Commit edaa53ca authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: change LRU to round-robin in extent status tree shrinker

In this commit we discard the lru algorithm for inodes with extent
status tree because it takes significant effort to maintain a lru list
in extent status tree shrinker and the shrinker can take a long time to
scan this lru list in order to reclaim some objects.

We replace the lru ordering with a simple round-robin.  After that we
never need to keep a lru list.  That means that the list needn't be
sorted if the shrinker can not reclaim any objects in the first round.

Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 2f8e0a7c
...@@ -878,10 +878,9 @@ struct ext4_inode_info { ...@@ -878,10 +878,9 @@ struct ext4_inode_info {
/* extents status tree */ /* extents status tree */
struct ext4_es_tree i_es_tree; struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock; rwlock_t i_es_lock;
struct list_head i_es_lru; struct list_head i_es_list;
unsigned int i_es_all_nr; /* protected by i_es_lock */ unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_lru_nr; /* protected by i_es_lock */ unsigned int i_es_shk_nr; /* protected by i_es_lock */
unsigned long i_touch_when; /* jiffies of last accessing */
/* ialloc */ /* ialloc */
ext4_group_t i_last_alloc_group; ext4_group_t i_last_alloc_group;
...@@ -1322,10 +1321,11 @@ struct ext4_sb_info { ...@@ -1322,10 +1321,11 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */ /* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker; struct shrinker s_es_shrinker;
struct list_head s_es_lru; struct list_head s_es_list;
long s_es_nr_inode;
struct ext4_es_stats s_es_stats; struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache; struct mb_cache *s_mb_cache;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */ /* Ratelimit ext4 messages. */
struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_err_ratelimit_state;
......
...@@ -4632,7 +4632,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4632,7 +4632,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
trace_ext4_ext_map_blocks_exit(inode, flags, map, trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated); err ? err : allocated);
ext4_es_lru_add(inode); ext4_es_list_add(inode);
return err ? err : allocated; return err ? err : allocated;
} }
...@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
error = ext4_fill_fiemap_extents(inode, start_blk, error = ext4_fill_fiemap_extents(inode, start_blk,
len_blks, fieinfo); len_blks, fieinfo);
} }
ext4_es_lru_add(inode); ext4_es_list_add(inode);
return error; return error;
} }
......
...@@ -149,7 +149,7 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -149,7 +149,7 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end); ext4_lblk_t end);
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan); int nr_to_scan);
static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei); struct ext4_inode_info *locked_ei);
int __init ext4_init_es(void) int __init ext4_init_es(void)
...@@ -298,6 +298,36 @@ void ext4_es_find_delayed_extent_range(struct inode *inode, ...@@ -298,6 +298,36 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
trace_ext4_es_find_delayed_extent_range_exit(inode, es); trace_ext4_es_find_delayed_extent_range_exit(inode, es);
} }
void ext4_es_list_add(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (!list_empty(&ei->i_es_list))
return;
spin_lock(&sbi->s_es_lock);
if (list_empty(&ei->i_es_list)) {
list_add_tail(&ei->i_es_list, &sbi->s_es_list);
sbi->s_es_nr_inode++;
}
spin_unlock(&sbi->s_es_lock);
}
void ext4_es_list_del(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
spin_lock(&sbi->s_es_lock);
if (!list_empty(&ei->i_es_list)) {
list_del_init(&ei->i_es_list);
sbi->s_es_nr_inode--;
WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
}
spin_unlock(&sbi->s_es_lock);
}
static struct extent_status * static struct extent_status *
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
ext4_fsblk_t pblk) ext4_fsblk_t pblk)
...@@ -314,9 +344,9 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ...@@ -314,9 +344,9 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
* We don't count delayed extent because we never try to reclaim them * We don't count delayed extent because we never try to reclaim them
*/ */
if (!ext4_es_is_delayed(es)) { if (!ext4_es_is_delayed(es)) {
EXT4_I(inode)->i_es_lru_nr++; EXT4_I(inode)->i_es_shk_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)-> percpu_counter_inc(&EXT4_SB(inode->i_sb)->
s_es_stats.es_stats_lru_cnt); s_es_stats.es_stats_shk_cnt);
} }
EXT4_I(inode)->i_es_all_nr++; EXT4_I(inode)->i_es_all_nr++;
...@@ -330,12 +360,12 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) ...@@ -330,12 +360,12 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
EXT4_I(inode)->i_es_all_nr--; EXT4_I(inode)->i_es_all_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
/* Decrease the lru counter when this es is not delayed */ /* Decrease the shrink counter when this es is not delayed */
if (!ext4_es_is_delayed(es)) { if (!ext4_es_is_delayed(es)) {
BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
EXT4_I(inode)->i_es_lru_nr--; EXT4_I(inode)->i_es_shk_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)-> percpu_counter_dec(&EXT4_SB(inode->i_sb)->
s_es_stats.es_stats_lru_cnt); s_es_stats.es_stats_shk_cnt);
} }
kmem_cache_free(ext4_es_cachep, es); kmem_cache_free(ext4_es_cachep, es);
...@@ -683,8 +713,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -683,8 +713,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
goto error; goto error;
retry: retry:
err = __es_insert_extent(inode, &newes); err = __es_insert_extent(inode, &newes);
if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
EXT4_I(inode))) 1, EXT4_I(inode)))
goto retry; goto retry;
if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
err = 0; err = 0;
...@@ -841,8 +871,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -841,8 +871,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
es->es_lblk = orig_es.es_lblk; es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len; es->es_len = orig_es.es_len;
if ((err == -ENOMEM) && if ((err == -ENOMEM) &&
__ext4_es_shrink(EXT4_SB(inode->i_sb), 1, __es_shrink(EXT4_SB(inode->i_sb),
EXT4_I(inode))) 1, EXT4_I(inode)))
goto retry; goto retry;
goto out; goto out;
} }
...@@ -914,6 +944,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -914,6 +944,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
end = lblk + len - 1; end = lblk + len - 1;
BUG_ON(end < lblk); BUG_ON(end < lblk);
/*
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
* so that we are sure __es_shrink() is done with the inode before it
* is reclaimed.
*/
write_lock(&EXT4_I(inode)->i_es_lock); write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end); err = __es_remove_extent(inode, lblk, end);
write_unlock(&EXT4_I(inode)->i_es_lock); write_unlock(&EXT4_I(inode)->i_es_lock);
...@@ -921,114 +956,80 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -921,114 +956,80 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
return err; return err;
} }
static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct list_head *b)
{
struct ext4_inode_info *eia, *eib;
eia = list_entry(a, struct ext4_inode_info, i_es_lru);
eib = list_entry(b, struct ext4_inode_info, i_es_lru);
if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
!ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
return 1;
if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
return -1;
if (eia->i_touch_when == eib->i_touch_when)
return 0;
if (time_after(eia->i_touch_when, eib->i_touch_when))
return 1;
else
return -1;
}
static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei) struct ext4_inode_info *locked_ei)
{ {
struct ext4_inode_info *ei; struct ext4_inode_info *ei;
struct ext4_es_stats *es_stats; struct ext4_es_stats *es_stats;
struct list_head *cur, *tmp;
LIST_HEAD(skipped);
ktime_t start_time; ktime_t start_time;
u64 scan_time; u64 scan_time;
int nr_to_walk;
int nr_shrunk = 0; int nr_shrunk = 0;
int retried = 0, skip_precached = 1, nr_skipped = 0; int retried = 0, nr_skipped = 0;
es_stats = &sbi->s_es_stats; es_stats = &sbi->s_es_stats;
start_time = ktime_get(); start_time = ktime_get();
spin_lock(&sbi->s_es_lru_lock);
retry: retry:
list_for_each_safe(cur, tmp, &sbi->s_es_lru) { spin_lock(&sbi->s_es_lock);
nr_to_walk = sbi->s_es_nr_inode;
while (nr_to_walk-- > 0) {
int shrunk; int shrunk;
/* if (list_empty(&sbi->s_es_list)) {
* If we have already reclaimed all extents from extent spin_unlock(&sbi->s_es_lock);
* status tree, just stop the loop immediately. goto out;
*/ }
if (percpu_counter_read_positive( ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
&es_stats->es_stats_lru_cnt) == 0) i_es_list);
break; /* Move the inode to the tail */
list_move(&ei->i_es_list, sbi->s_es_list.prev);
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
/* /*
* Skip the inode that is newer than the last_sorted * Normally we try hard to avoid shrinking precached inodes,
* time. Normally we try hard to avoid shrinking * but we will as a last resort.
* precached inodes, but we will as a last resort.
*/ */
if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || if (!retried && ext4_test_inode_state(&ei->vfs_inode,
(skip_precached && ext4_test_inode_state(&ei->vfs_inode, EXT4_STATE_EXT_PRECACHED)) {
EXT4_STATE_EXT_PRECACHED))) {
nr_skipped++; nr_skipped++;
list_move_tail(cur, &skipped);
continue; continue;
} }
if (ei->i_es_lru_nr == 0 || ei == locked_ei || if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
!write_trylock(&ei->i_es_lock)) nr_skipped++;
continue; continue;
}
/*
* Now we hold i_es_lock which protects us from inode reclaim
* freeing inode under us
*/
spin_unlock(&sbi->s_es_lock);
shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
if (ei->i_es_lru_nr == 0)
list_del_init(&ei->i_es_lru);
write_unlock(&ei->i_es_lock); write_unlock(&ei->i_es_lock);
nr_shrunk += shrunk; nr_shrunk += shrunk;
nr_to_scan -= shrunk; nr_to_scan -= shrunk;
if (nr_to_scan == 0) if (nr_to_scan == 0)
break; goto out;
spin_lock(&sbi->s_es_lock);
} }
spin_unlock(&sbi->s_es_lock);
/* Move the newer inodes into the tail of the LRU list. */
list_splice_tail(&skipped, &sbi->s_es_lru);
INIT_LIST_HEAD(&skipped);
/* /*
* If we skipped any inodes, and we weren't able to make any * If we skipped any inodes, and we weren't able to make any
* forward progress, sort the list and try again. * forward progress, try again to scan precached inodes.
*/ */
if ((nr_shrunk == 0) && nr_skipped && !retried) { if ((nr_shrunk == 0) && nr_skipped && !retried) {
retried++; retried++;
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
es_stats->es_stats_last_sorted = jiffies;
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
i_es_lru);
/*
* If there are no non-precached inodes left on the
* list, start releasing precached extents.
*/
if (ext4_test_inode_state(&ei->vfs_inode,
EXT4_STATE_EXT_PRECACHED))
skip_precached = 0;
goto retry; goto retry;
} }
spin_unlock(&sbi->s_es_lru_lock);
if (locked_ei && nr_shrunk == 0) if (locked_ei && nr_shrunk == 0)
nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
out:
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
if (likely(es_stats->es_stats_scan_time)) if (likely(es_stats->es_stats_scan_time))
es_stats->es_stats_scan_time = (scan_time + es_stats->es_stats_scan_time = (scan_time +
...@@ -1043,7 +1044,7 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, ...@@ -1043,7 +1044,7 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
else else
es_stats->es_stats_shrunk = nr_shrunk; es_stats->es_stats_shrunk = nr_shrunk;
trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
nr_skipped, retried); nr_skipped, retried);
return nr_shrunk; return nr_shrunk;
} }
...@@ -1055,7 +1056,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, ...@@ -1055,7 +1056,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
struct ext4_sb_info *sbi; struct ext4_sb_info *sbi;
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
return nr; return nr;
} }
...@@ -1068,13 +1069,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, ...@@ -1068,13 +1069,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
int nr_to_scan = sc->nr_to_scan; int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk; int ret, nr_shrunk;
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
if (!nr_to_scan) if (!nr_to_scan)
return ret; return ret;
nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk; return nr_shrunk;
...@@ -1102,28 +1103,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) ...@@ -1102,28 +1103,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
return 0; return 0;
/* here we just find an inode that has the max nr. of objects */ /* here we just find an inode that has the max nr. of objects */
spin_lock(&sbi->s_es_lru_lock); spin_lock(&sbi->s_es_lock);
list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
inode_cnt++; inode_cnt++;
if (max && max->i_es_all_nr < ei->i_es_all_nr) if (max && max->i_es_all_nr < ei->i_es_all_nr)
max = ei; max = ei;
else if (!max) else if (!max)
max = ei; max = ei;
} }
spin_unlock(&sbi->s_es_lru_lock); spin_unlock(&sbi->s_es_lock);
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
seq_printf(seq, " %lu/%lu cache hits/misses\n", seq_printf(seq, " %lu/%lu cache hits/misses\n",
es_stats->es_stats_cache_hits, es_stats->es_stats_cache_hits,
es_stats->es_stats_cache_misses); es_stats->es_stats_cache_misses);
if (es_stats->es_stats_last_sorted != 0)
seq_printf(seq, " %u ms last sorted interval\n",
jiffies_to_msecs(jiffies -
es_stats->es_stats_last_sorted));
if (inode_cnt) if (inode_cnt)
seq_printf(seq, " %d inodes on lru list\n", inode_cnt); seq_printf(seq, " %d inodes on list\n", inode_cnt);
seq_printf(seq, "average:\n %llu us scan time\n", seq_printf(seq, "average:\n %llu us scan time\n",
div_u64(es_stats->es_stats_scan_time, 1000)); div_u64(es_stats->es_stats_scan_time, 1000));
...@@ -1132,7 +1129,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) ...@@ -1132,7 +1129,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
seq_printf(seq, seq_printf(seq,
"maximum:\n %lu inode (%u objects, %u reclaimable)\n" "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
" %llu us max scan time\n", " %llu us max scan time\n",
max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
div_u64(es_stats->es_stats_max_scan_time, 1000)); div_u64(es_stats->es_stats_max_scan_time, 1000));
return 0; return 0;
...@@ -1181,9 +1178,9 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) ...@@ -1181,9 +1178,9 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
{ {
int err; int err;
INIT_LIST_HEAD(&sbi->s_es_lru); INIT_LIST_HEAD(&sbi->s_es_list);
spin_lock_init(&sbi->s_es_lru_lock); sbi->s_es_nr_inode = 0;
sbi->s_es_stats.es_stats_last_sorted = 0; spin_lock_init(&sbi->s_es_lock);
sbi->s_es_stats.es_stats_shrunk = 0; sbi->s_es_stats.es_stats_shrunk = 0;
sbi->s_es_stats.es_stats_cache_hits = 0; sbi->s_es_stats.es_stats_cache_hits = 0;
sbi->s_es_stats.es_stats_cache_misses = 0; sbi->s_es_stats.es_stats_cache_misses = 0;
...@@ -1192,7 +1189,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) ...@@ -1192,7 +1189,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
if (err) if (err)
return err; return err;
err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
if (err) if (err)
goto err1; goto err1;
...@@ -1210,7 +1207,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) ...@@ -1210,7 +1207,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
return 0; return 0;
err2: err2:
percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
err1: err1:
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
return err; return err;
...@@ -1221,37 +1218,10 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) ...@@ -1221,37 +1218,10 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
if (sbi->s_proc) if (sbi->s_proc)
remove_proc_entry("es_shrinker_info", sbi->s_proc); remove_proc_entry("es_shrinker_info", sbi->s_proc);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
unregister_shrinker(&sbi->s_es_shrinker); unregister_shrinker(&sbi->s_es_shrinker);
} }
void ext4_es_lru_add(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ei->i_touch_when = jiffies;
if (!list_empty(&ei->i_es_lru))
return;
spin_lock(&sbi->s_es_lru_lock);
if (list_empty(&ei->i_es_lru))
list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
spin_unlock(&sbi->s_es_lru_lock);
}
void ext4_es_lru_del(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
spin_lock(&sbi->s_es_lru_lock);
if (!list_empty(&ei->i_es_lru))
list_del_init(&ei->i_es_lru);
spin_unlock(&sbi->s_es_lru_lock);
}
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan) int nr_to_scan)
{ {
...@@ -1263,7 +1233,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, ...@@ -1263,7 +1233,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST); DEFAULT_RATELIMIT_BURST);
if (ei->i_es_lru_nr == 0) if (ei->i_es_shk_nr == 0)
return 0; return 0;
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
......
...@@ -65,14 +65,13 @@ struct ext4_es_tree { ...@@ -65,14 +65,13 @@ struct ext4_es_tree {
}; };
struct ext4_es_stats { struct ext4_es_stats {
unsigned long es_stats_last_sorted;
unsigned long es_stats_shrunk; unsigned long es_stats_shrunk;
unsigned long es_stats_cache_hits; unsigned long es_stats_cache_hits;
unsigned long es_stats_cache_misses; unsigned long es_stats_cache_misses;
u64 es_stats_scan_time; u64 es_stats_scan_time;
u64 es_stats_max_scan_time; u64 es_stats_max_scan_time;
struct percpu_counter es_stats_all_cnt; struct percpu_counter es_stats_all_cnt;
struct percpu_counter es_stats_lru_cnt; struct percpu_counter es_stats_shk_cnt;
}; };
extern int __init ext4_init_es(void); extern int __init ext4_init_es(void);
...@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, ...@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
extern void ext4_es_lru_add(struct inode *inode); extern void ext4_es_list_add(struct inode *inode);
extern void ext4_es_lru_del(struct inode *inode); extern void ext4_es_list_del(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */ #endif /* _EXT4_EXTENTS_STATUS_H */
...@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
/* Lookup extent status tree firstly */ /* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
ext4_es_lru_add(inode); ext4_es_list_add(inode);
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) + map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk; map->m_lblk - es.es_lblk;
...@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */ /* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, &es)) { if (ext4_es_lookup_extent(inode, iblock, &es)) {
ext4_es_lru_add(inode); ext4_es_list_add(inode);
if (ext4_es_is_hole(&es)) { if (ext4_es_is_hole(&es)) {
retval = 0; retval = 0;
down_read(&EXT4_I(inode)->i_data_sem); down_read(&EXT4_I(inode)->i_data_sem);
......
...@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) ...@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
ext4_es_lru_del(inode1); ext4_es_list_del(inode1);
ext4_es_lru_del(inode2); ext4_es_list_del(inode2);
isize = i_size_read(inode1); isize = i_size_read(inode1);
i_size_write(inode1, i_size_read(inode2)); i_size_write(inode1, i_size_read(inode2));
......
...@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ...@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_prealloc_lock); spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree); ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock); rwlock_init(&ei->i_es_lock);
INIT_LIST_HEAD(&ei->i_es_lru); INIT_LIST_HEAD(&ei->i_es_list);
ei->i_es_all_nr = 0; ei->i_es_all_nr = 0;
ei->i_es_lru_nr = 0; ei->i_es_shk_nr = 0;
ei->i_touch_when = 0;
ei->i_reserved_data_blocks = 0; ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0; ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0; ei->i_allocated_meta_blocks = 0;
...@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode) ...@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode)
dquot_drop(inode); dquot_drop(inode);
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
ext4_es_lru_del(inode); ext4_es_list_del(inode);
if (EXT4_I(inode)->jinode) { if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode); EXT4_I(inode)->jinode);
......
...@@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range, ...@@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range,
TRACE_EVENT(ext4_es_shrink, TRACE_EVENT(ext4_es_shrink,
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
int skip_precached, int nr_skipped, int retried), int nr_skipped, int retried),
TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( int, nr_shrunk ) __field( int, nr_shrunk )
__field( unsigned long long, scan_time ) __field( unsigned long long, scan_time )
__field( int, skip_precached )
__field( int, nr_skipped ) __field( int, nr_skipped )
__field( int, retried ) __field( int, retried )
), ),
...@@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink, ...@@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink,
__entry->dev = sb->s_dev; __entry->dev = sb->s_dev;
__entry->nr_shrunk = nr_shrunk; __entry->nr_shrunk = nr_shrunk;
__entry->scan_time = div_u64(scan_time, 1000); __entry->scan_time = div_u64(scan_time, 1000);
__entry->skip_precached = skip_precached;
__entry->nr_skipped = nr_skipped; __entry->nr_skipped = nr_skipped;
__entry->retried = retried; __entry->retried = retried;
), ),
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu "
"nr_skipped %d retried %d", "nr_skipped %d retried %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
__entry->scan_time, __entry->skip_precached, __entry->scan_time, __entry->nr_skipped, __entry->retried)
__entry->nr_skipped, __entry->retried)
); );
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment