Commit edaa53ca authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: change LRU to round-robin in extent status tree shrinker

In this commit we discard the lru algorithm for inodes with extent
status tree because it takes significant effort to maintain a lru list
in extent status tree shrinker and the shrinker can take a long time to
scan this lru list in order to reclaim some objects.

We replace the lru ordering with a simple round-robin.  After that we
never need to keep a lru list.  That means that the list needn't be
sorted if the shrinker can not reclaim any objects in the first round.

Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 2f8e0a7c
......@@ -878,10 +878,9 @@ struct ext4_inode_info {
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
struct list_head i_es_lru;
struct list_head i_es_list;
unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_lru_nr; /* protected by i_es_lock */
unsigned long i_touch_when; /* jiffies of last accessing */
unsigned int i_es_shk_nr; /* protected by i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
......@@ -1322,10 +1321,11 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_lru;
struct list_head s_es_list;
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
struct ratelimit_state s_err_ratelimit_state;
......
......@@ -4632,7 +4632,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated);
ext4_es_lru_add(inode);
ext4_es_list_add(inode);
return err ? err : allocated;
}
......@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
error = ext4_fill_fiemap_extents(inode, start_blk,
len_blks, fieinfo);
}
ext4_es_lru_add(inode);
ext4_es_list_add(inode);
return error;
}
......
This diff is collapsed.
......@@ -65,14 +65,13 @@ struct ext4_es_tree {
};
struct ext4_es_stats {
unsigned long es_stats_last_sorted;
unsigned long es_stats_shrunk;
unsigned long es_stats_cache_hits;
unsigned long es_stats_cache_misses;
u64 es_stats_scan_time;
u64 es_stats_max_scan_time;
struct percpu_counter es_stats_all_cnt;
struct percpu_counter es_stats_lru_cnt;
struct percpu_counter es_stats_shk_cnt;
};
extern int __init ext4_init_es(void);
......@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_lru_add(struct inode *inode);
extern void ext4_es_lru_del(struct inode *inode);
extern void ext4_es_list_add(struct inode *inode);
extern void ext4_es_list_del(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */
......@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
ext4_es_lru_add(inode);
ext4_es_list_add(inode);
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
......@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, &es)) {
ext4_es_lru_add(inode);
ext4_es_list_add(inode);
if (ext4_es_is_hole(&es)) {
retval = 0;
down_read(&EXT4_I(inode)->i_data_sem);
......
......@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
ext4_es_lru_del(inode1);
ext4_es_lru_del(inode2);
ext4_es_list_del(inode1);
ext4_es_list_del(inode2);
isize = i_size_read(inode1);
i_size_write(inode1, i_size_read(inode2));
......
......@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
INIT_LIST_HEAD(&ei->i_es_lru);
INIT_LIST_HEAD(&ei->i_es_list);
ei->i_es_all_nr = 0;
ei->i_es_lru_nr = 0;
ei->i_touch_when = 0;
ei->i_es_shk_nr = 0;
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
......@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode)
dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
ext4_es_lru_del(inode);
ext4_es_list_del(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
......
......@@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range,
TRACE_EVENT(ext4_es_shrink,
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
int skip_precached, int nr_skipped, int retried),
int nr_skipped, int retried),
TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried),
TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, nr_shrunk )
__field( unsigned long long, scan_time )
__field( int, skip_precached )
__field( int, nr_skipped )
__field( int, retried )
),
......@@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink,
__entry->dev = sb->s_dev;
__entry->nr_shrunk = nr_shrunk;
__entry->scan_time = div_u64(scan_time, 1000);
__entry->skip_precached = skip_precached;
__entry->nr_skipped = nr_skipped;
__entry->retried = retried;
),
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d "
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu "
"nr_skipped %d retried %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
__entry->scan_time, __entry->skip_precached,
__entry->nr_skipped, __entry->retried)
__entry->scan_time, __entry->nr_skipped, __entry->retried)
);
#endif /* _TRACE_EXT4_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment