Commit adfe9357 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Tweak btree key cache shrinker so it actually frees

Freeing key cache items is a multi stage process; we need to wait for an
SRCU grace period to elapse, and we handle this ourselves - partially to
avoid callback overhead, but primarily so that when allocating we can
first allocate from the freed items waiting for an SRCU grace period.

Previously, the shrinker was counting the items on the 'waiting for SRCU
grace period' lists as items being scanned, but this meant that too many
items waiting for an SRCU grace period could prevent it from doing any
work at all.

After this, we're seeing that items skipped due to the accessed bit are
the main cause of the shrinker not making any progress, and we actually
want the key cache shrinker to run quite aggressively because reclaimed
items will still generally be found (more compactly) in the btree node
cache - so we also tweak the shrinker to not count those against
nr_to_scan.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 6e4d9bd1
...@@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
* Newest freed entries are at the end of the list - once we hit one * Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out: * that's too new to be freed, we can bail out:
*/ */
scanned += bc->nr_freed_nonpcpu;
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq)) ck->btree_trans_barrier_seq))
...@@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_nonpcpu--; bc->nr_freed_nonpcpu--;
} }
if (scanned >= nr)
goto out;
scanned += bc->nr_freed_pcpu;
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq)) ck->btree_trans_barrier_seq))
...@@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_pcpu--; bc->nr_freed_pcpu--;
} }
if (scanned >= nr)
goto out;
rcu_read_lock(); rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (bc->shrink_iter >= tbl->size) if (bc->shrink_iter >= tbl->size)
...@@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
ck = container_of(pos, struct bkey_cached, hash); ck = container_of(pos, struct bkey_cached, hash);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
goto next; goto next;
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
else if (bkey_cached_lock_for_evict(ck)) { goto next;
} else if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(bc, ck); bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck); bkey_cached_free(bc, ck);
} }
...@@ -916,7 +906,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, ...@@ -916,7 +906,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
} while (scanned < nr && bc->shrink_iter != start); } while (scanned < nr && bc->shrink_iter != start);
rcu_read_unlock(); rcu_read_unlock();
out:
memalloc_nofs_restore(flags); memalloc_nofs_restore(flags);
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment