Commit fe5b37f6 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Btree key cache improvements

 - In userspace, we don't have real percpu variables; this patch
   disables the percpu freelists in userspace
 - add some error messages for the asserts in
   bch2_fs_btree_key_cache_exit(); we've been hitting this (only in
   userspace, oddly), perhaps this will help us track down the error.
 - bkey_cached_reuse() should likely be taking the key cache lock, and
   it's a slowpath so it doesn't hurt to
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent dccedaaa
...@@ -112,6 +112,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, ...@@ -112,6 +112,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
if (!ck->c.lock.readers) { if (!ck->c.lock.readers) {
#ifdef __KERNEL__
preempt_disable(); preempt_disable();
f = this_cpu_ptr(bc->pcpu_freed); f = this_cpu_ptr(bc->pcpu_freed);
...@@ -136,6 +137,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, ...@@ -136,6 +137,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
list_move_tail(&ck->list, &bc->freed_nonpcpu); list_move_tail(&ck->list, &bc->freed_nonpcpu);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
} }
#else
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_nonpcpu);
mutex_unlock(&bc->lock);
#endif
} else { } else {
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_pcpu); list_move_tail(&ck->list, &bc->freed_pcpu);
...@@ -174,6 +180,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path) ...@@ -174,6 +180,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path)
bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id); bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
if (!pcpu_readers) { if (!pcpu_readers) {
#ifdef __KERNEL__
preempt_disable(); preempt_disable();
f = this_cpu_ptr(bc->pcpu_freed); f = this_cpu_ptr(bc->pcpu_freed);
if (f->nr) if (f->nr)
...@@ -196,6 +203,14 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path) ...@@ -196,6 +203,14 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path)
preempt_enable(); preempt_enable();
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
} }
#else
mutex_lock(&bc->lock);
if (!list_empty(&bc->freed_nonpcpu)) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
}
mutex_unlock(&bc->lock);
#endif
} else { } else {
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
if (!list_empty(&bc->freed_pcpu)) { if (!list_empty(&bc->freed_pcpu)) {
...@@ -254,6 +269,7 @@ bkey_cached_reuse(struct btree_key_cache *c) ...@@ -254,6 +269,7 @@ bkey_cached_reuse(struct btree_key_cache *c)
struct bkey_cached *ck; struct bkey_cached *ck;
unsigned i; unsigned i;
mutex_lock(&c->lock);
rcu_read_lock(); rcu_read_lock();
tbl = rht_dereference_rcu(c->table.tbl, &c->table); tbl = rht_dereference_rcu(c->table.tbl, &c->table);
for (i = 0; i < tbl->size; i++) for (i = 0; i < tbl->size; i++)
...@@ -261,13 +277,14 @@ bkey_cached_reuse(struct btree_key_cache *c) ...@@ -261,13 +277,14 @@ bkey_cached_reuse(struct btree_key_cache *c)
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bkey_cached_lock_for_evict(ck)) { bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(c, ck); bkey_cached_evict(c, ck);
rcu_read_unlock(); goto out;
return ck;
} }
} }
ck = NULL;
out:
rcu_read_unlock(); rcu_read_unlock();
mutex_unlock(&c->lock);
return NULL; return ck;
} }
static struct bkey_cached * static struct bkey_cached *
...@@ -873,13 +890,19 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) ...@@ -873,13 +890,19 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
struct bkey_cached *ck, *n; struct bkey_cached *ck, *n;
struct rhash_head *pos; struct rhash_head *pos;
unsigned i; unsigned i;
#ifdef __KERNEL__
int cpu; int cpu;
#endif
if (bc->shrink.list.next) if (bc->shrink.list.next)
unregister_shrinker(&bc->shrink); unregister_shrinker(&bc->shrink);
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
/*
* The loop is needed to guard against racing with rehash:
*/
while (atomic_long_read(&bc->nr_keys)) {
rcu_read_lock(); rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (tbl) if (tbl)
...@@ -889,7 +912,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) ...@@ -889,7 +912,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
list_add(&ck->list, &bc->freed_nonpcpu); list_add(&ck->list, &bc->freed_nonpcpu);
} }
rcu_read_unlock(); rcu_read_unlock();
}
#ifdef __KERNEL__
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct btree_key_cache_freelist *f = struct btree_key_cache_freelist *f =
per_cpu_ptr(bc->pcpu_freed, cpu); per_cpu_ptr(bc->pcpu_freed, cpu);
...@@ -899,6 +924,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) ...@@ -899,6 +924,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
list_add(&ck->list, &bc->freed_nonpcpu); list_add(&ck->list, &bc->freed_nonpcpu);
} }
} }
#endif
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
...@@ -914,10 +940,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) ...@@ -914,10 +940,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
kmem_cache_free(bch2_key_cache, ck); kmem_cache_free(bch2_key_cache, ck);
} }
BUG_ON(atomic_long_read(&bc->nr_dirty) && if (atomic_long_read(&bc->nr_dirty) &&
!bch2_journal_error(&c->journal) && !bch2_journal_error(&c->journal) &&
test_bit(BCH_FS_WAS_RW, &c->flags)); test_bit(BCH_FS_WAS_RW, &c->flags))
BUG_ON(atomic_long_read(&bc->nr_keys)); panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n",
atomic_long_read(&bc->nr_dirty));
if (atomic_long_read(&bc->nr_keys))
panic("btree key cache shutdown error: nr_keys nonzero (%li)\n",
atomic_long_read(&bc->nr_keys));
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
...@@ -939,9 +970,11 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) ...@@ -939,9 +970,11 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
int ret; int ret;
#ifdef __KERNEL__
bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
if (!bc->pcpu_freed) if (!bc->pcpu_freed)
return -ENOMEM; return -ENOMEM;
#endif
ret = rhashtable_init(&bc->table, &bch2_btree_key_cache_params); ret = rhashtable_init(&bc->table, &bch2_btree_key_cache_params);
if (ret) if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment