Commit edaa53ca authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: change LRU to round-robin in extent status tree shrinker

In this commit we discard the lru algorithm for inodes with extent
status tree because it takes significant effort to maintain a lru list
in extent status tree shrinker and the shrinker can take a long time to
scan this lru list in order to reclaim some objects.

We replace the lru ordering with a simple round-robin.  After that we
never need to keep a lru list.  That means that the list needn't be
sorted if the shrinker can not reclaim any objects in the first round.

Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 2f8e0a7c
...@@ -878,10 +878,9 @@ struct ext4_inode_info { ...@@ -878,10 +878,9 @@ struct ext4_inode_info {
/* extents status tree */ /* extents status tree */
struct ext4_es_tree i_es_tree; struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock; rwlock_t i_es_lock;
struct list_head i_es_lru; struct list_head i_es_list;
unsigned int i_es_all_nr; /* protected by i_es_lock */ unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_lru_nr; /* protected by i_es_lock */ unsigned int i_es_shk_nr; /* protected by i_es_lock */
unsigned long i_touch_when; /* jiffies of last accessing */
/* ialloc */ /* ialloc */
ext4_group_t i_last_alloc_group; ext4_group_t i_last_alloc_group;
...@@ -1322,10 +1321,11 @@ struct ext4_sb_info { ...@@ -1322,10 +1321,11 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */ /* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker; struct shrinker s_es_shrinker;
struct list_head s_es_lru; struct list_head s_es_list;
long s_es_nr_inode;
struct ext4_es_stats s_es_stats; struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache; struct mb_cache *s_mb_cache;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */ /* Ratelimit ext4 messages. */
struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_err_ratelimit_state;
......
...@@ -4632,7 +4632,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4632,7 +4632,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
trace_ext4_ext_map_blocks_exit(inode, flags, map, trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated); err ? err : allocated);
ext4_es_lru_add(inode); ext4_es_list_add(inode);
return err ? err : allocated; return err ? err : allocated;
} }
...@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
error = ext4_fill_fiemap_extents(inode, start_blk, error = ext4_fill_fiemap_extents(inode, start_blk,
len_blks, fieinfo); len_blks, fieinfo);
} }
ext4_es_lru_add(inode); ext4_es_list_add(inode);
return error; return error;
} }
......
This diff is collapsed.
...@@ -65,14 +65,13 @@ struct ext4_es_tree { ...@@ -65,14 +65,13 @@ struct ext4_es_tree {
}; };
struct ext4_es_stats { struct ext4_es_stats {
unsigned long es_stats_last_sorted;
unsigned long es_stats_shrunk; unsigned long es_stats_shrunk;
unsigned long es_stats_cache_hits; unsigned long es_stats_cache_hits;
unsigned long es_stats_cache_misses; unsigned long es_stats_cache_misses;
u64 es_stats_scan_time; u64 es_stats_scan_time;
u64 es_stats_max_scan_time; u64 es_stats_max_scan_time;
struct percpu_counter es_stats_all_cnt; struct percpu_counter es_stats_all_cnt;
struct percpu_counter es_stats_lru_cnt; struct percpu_counter es_stats_shk_cnt;
}; };
extern int __init ext4_init_es(void); extern int __init ext4_init_es(void);
...@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, ...@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_lru_add(struct inode *inode); extern void ext4_es_list_add(struct inode *inode);
extern void ext4_es_lru_del(struct inode *inode); extern void ext4_es_list_del(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */ #endif /* _EXT4_EXTENTS_STATUS_H */
...@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
/* Lookup extent status tree firstly */ /* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
ext4_es_lru_add(inode); ext4_es_list_add(inode);
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) + map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk; map->m_lblk - es.es_lblk;
...@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */ /* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, &es)) { if (ext4_es_lookup_extent(inode, iblock, &es)) {
ext4_es_lru_add(inode); ext4_es_list_add(inode);
if (ext4_es_is_hole(&es)) { if (ext4_es_is_hole(&es)) {
retval = 0; retval = 0;
down_read(&EXT4_I(inode)->i_data_sem); down_read(&EXT4_I(inode)->i_data_sem);
......
...@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) ...@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
ext4_es_lru_del(inode1); ext4_es_list_del(inode1);
ext4_es_lru_del(inode2); ext4_es_list_del(inode2);
isize = i_size_read(inode1); isize = i_size_read(inode1);
i_size_write(inode1, i_size_read(inode2)); i_size_write(inode1, i_size_read(inode2));
......
...@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ...@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_prealloc_lock); spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree); ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock); rwlock_init(&ei->i_es_lock);
INIT_LIST_HEAD(&ei->i_es_lru); INIT_LIST_HEAD(&ei->i_es_list);
ei->i_es_all_nr = 0; ei->i_es_all_nr = 0;
ei->i_es_lru_nr = 0; ei->i_es_shk_nr = 0;
ei->i_touch_when = 0;
ei->i_reserved_data_blocks = 0; ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0; ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0; ei->i_allocated_meta_blocks = 0;
...@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode) ...@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode)
dquot_drop(inode); dquot_drop(inode);
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
ext4_es_lru_del(inode); ext4_es_list_del(inode);
if (EXT4_I(inode)->jinode) { if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode); EXT4_I(inode)->jinode);
......
...@@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range, ...@@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range,
TRACE_EVENT(ext4_es_shrink, TRACE_EVENT(ext4_es_shrink,
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
int skip_precached, int nr_skipped, int retried), int nr_skipped, int retried),
TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( int, nr_shrunk ) __field( int, nr_shrunk )
__field( unsigned long long, scan_time ) __field( unsigned long long, scan_time )
__field( int, skip_precached )
__field( int, nr_skipped ) __field( int, nr_skipped )
__field( int, retried ) __field( int, retried )
), ),
...@@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink, ...@@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink,
__entry->dev = sb->s_dev; __entry->dev = sb->s_dev;
__entry->nr_shrunk = nr_shrunk; __entry->nr_shrunk = nr_shrunk;
__entry->scan_time = div_u64(scan_time, 1000); __entry->scan_time = div_u64(scan_time, 1000);
__entry->skip_precached = skip_precached;
__entry->nr_skipped = nr_skipped; __entry->nr_skipped = nr_skipped;
__entry->retried = retried; __entry->retried = retried;
), ),
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu "
"nr_skipped %d retried %d", "nr_skipped %d retried %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
__entry->scan_time, __entry->skip_precached, __entry->scan_time, __entry->nr_skipped, __entry->retried)
__entry->nr_skipped, __entry->retried)
); );
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment