Commit 2531d9ee authored by Kent Overstreet's avatar Kent Overstreet

bcache: Kill unused freelist

This was originally added as at optimization that for various reasons isn't
needed anymore, but it does add a lot of nasty corner cases (and it was
responsible for some recently fixed bugs). Just get rid of it now.
Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 0a63b66d
......@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
if (CACHE_SYNC(&ca->set->sb)) {
ca->need_save_prio = max(ca->need_save_prio,
bucket_disk_gen(b));
WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
}
return ret;
}
......@@ -120,58 +114,46 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock);
}
/* Allocation */
/*
* Background allocation thread: scans for buckets to be invalidated,
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
* then optionally issues discard commands to the newly free buckets, then puts
* them on the various freelists.
*/
static inline bool can_inc_bucket_gen(struct bucket *b)
{
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
}
bool bch_bucket_add_unused(struct cache *ca, struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
unsigned i;
for (i = 0; i < RESERVE_NONE; i++)
if (!fifo_full(&ca->free[i]))
goto add;
return false;
}
add:
b->prio = 0;
if (can_inc_bucket_gen(b) &&
fifo_push(&ca->unused, b - ca->buckets)) {
atomic_inc(&b->pin);
return true;
}
BUG_ON(!ca->set->gc_mark_valid);
return false;
}
static bool can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
return (!GC_MARK(b) ||
GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
!atomic_read(&b->pin) &&
can_inc_bucket_gen(b);
}
static void invalidate_one_bucket(struct cache *ca, struct bucket *b)
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
size_t bucket = b - ca->buckets;
lockdep_assert_held(&ca->set->bucket_lock);
BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
if (GC_SECTORS_USED(b))
trace_bcache_invalidate(ca, bucket);
trace_bcache_invalidate(ca, b - ca->buckets);
bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO;
atomic_inc(&b->pin);
fifo_push(&ca->free_inc, bucket);
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
{
__bch_invalidate_one_bucket(ca, b);
fifo_push(&ca->free_inc, b - ca->buckets);
}
/*
......@@ -201,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
ca->heap.used = 0;
for_each_bucket(b, ca) {
/*
* If we fill up the unused list, if we then return before
* adding anything to the free_inc list we'll skip writing
* prios/gens and just go back to allocating from the unused
* list:
*/
if (fifo_full(&ca->unused))
return;
if (!can_invalidate_bucket(ca, b))
continue;
if (!GC_SECTORS_USED(b) &&
bch_bucket_add_unused(ca, b))
if (!bch_can_invalidate_bucket(ca, b))
continue;
if (!heap_full(&ca->heap))
......@@ -239,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
return;
}
invalidate_one_bucket(ca, b);
bch_invalidate_one_bucket(ca, b);
}
}
......@@ -255,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)
b = ca->buckets + ca->fifo_last_bucket++;
if (can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b);
if (bch_can_invalidate_bucket(ca, b))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1;
......@@ -280,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)
b = ca->buckets + n;
if (can_invalidate_bucket(ca, b))
invalidate_one_bucket(ca, b);
if (bch_can_invalidate_bucket(ca, b))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1;
......@@ -293,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)
static void invalidate_buckets(struct cache *ca)
{
if (ca->invalidate_needs_gc)
return;
BUG_ON(ca->invalidate_needs_gc);
switch (CACHE_REPLACEMENT(&ca->sb)) {
case CACHE_REPLACEMENT_LRU:
......@@ -354,17 +322,10 @@ static int bch_allocator_thread(void *arg)
* possibly issue discards to them, then we add the bucket to
* the free list:
*/
while (1) {
while (!fifo_empty(&ca->free_inc)) {
long bucket;
if ((!atomic_read(&ca->set->prio_blocked) ||
!CACHE_SYNC(&ca->set->sb)) &&
!fifo_empty(&ca->unused))
fifo_pop(&ca->unused, bucket);
else if (!fifo_empty(&ca->free_inc))
fifo_pop(&ca->free_inc, bucket);
else
break;
if (ca->discard) {
mutex_unlock(&ca->set->bucket_lock);
......@@ -385,9 +346,9 @@ static int bch_allocator_thread(void *arg)
* them to the free_inc list:
*/
retry_invalidate:
allocator_wait(ca, ca->set->gc_mark_valid &&
(ca->need_save_prio > 64 ||
!ca->invalidate_needs_gc));
!ca->invalidate_needs_gc);
invalidate_buckets(ca);
/*
......@@ -395,13 +356,28 @@ static int bch_allocator_thread(void *arg)
* new stuff to them:
*/
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
if (CACHE_SYNC(&ca->set->sb) &&
(!fifo_empty(&ca->free_inc) ||
ca->need_save_prio > 64))
if (CACHE_SYNC(&ca->set->sb)) {
/*
* This could deadlock if an allocation with a btree
* node locked ever blocked - having the btree node
* locked would block garbage collection, but here we're
* waiting on garbage collection before we invalidate
* and free anything.
*
* But this should be safe since the btree code always
* uses btree_check_reserve() before allocating now, and
* if it fails it blocks without btree nodes locked.
*/
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
bch_prio_write(ca);
}
}
}
/* Allocation */
long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
{
DEFINE_WAIT(w);
......@@ -447,8 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
BUG_ON(i == r);
fifo_for_each(i, &ca->free_inc, iter)
BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r);
}
b = ca->buckets + r;
......@@ -470,17 +444,19 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
return r;
}
void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
}
void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
struct bucket *b = PTR_BUCKET(c, k, i);
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
}
for (i = 0; i < KEY_PTRS(k); i++)
__bch_bucket_free(PTR_CACHE(c, k, i),
PTR_BUCKET(c, k, i));
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
......
......@@ -195,7 +195,6 @@ struct bucket {
atomic_t pin;
uint16_t prio;
uint8_t gen;
uint8_t disk_gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint8_t gc_gen;
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
......@@ -426,14 +425,9 @@ struct cache {
* their new gen to disk. After prio_write() finishes writing the new
* gens/prios, they'll be moved to the free list (and possibly discarded
* in the process)
*
* unused: GC found nothing pointing into these buckets (possibly
* because all the data they contained was overwritten), so we only
* need to discard them before they can be moved to the free list.
*/
DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc);
DECLARE_FIFO(long, unused);
size_t fifo_last_bucket;
......@@ -442,12 +436,6 @@ struct cache {
DECLARE_HEAP(struct bucket *, heap);
/*
* max(gen - disk_gen) for all buckets. When it gets too big we have to
* call prio_write() to keep gens from wrapping.
*/
uint8_t need_save_prio;
/*
* If nonzero, we know we aren't going to find any buckets to invalidate
* until a gc finishes - otherwise we could pointlessly burn a ton of
......@@ -848,9 +836,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
/*
* bucket_gc_gen() returns the difference between the bucket's current gen and
* the oldest gen of any pointer into that bucket in the btree (last_gc).
*
* bucket_disk_gen() returns the difference between the current gen and the gen
* on disk; they're both used to make sure gens don't wrap around.
*/
static inline uint8_t bucket_gc_gen(struct bucket *b)
......@@ -858,13 +843,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
return b->gen - b->last_gc;
}
static inline uint8_t bucket_disk_gen(struct bucket *b)
{
return b->gen - b->disk_gen;
}
#define BUCKET_GC_GEN_MAX 96U
#define BUCKET_DISK_GEN_MAX 64U
#define kobj_attribute_write(n, fn) \
static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
......@@ -897,11 +876,14 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
void __bch_bucket_free(struct cache *, struct bucket *);
void bch_bucket_free(struct cache_set *, struct bkey *);
long bch_bucket_alloc(struct cache *, unsigned, bool);
int __bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned,
......
......@@ -1641,7 +1641,7 @@ static void btree_gc_start(struct cache_set *c)
mutex_unlock(&c->bucket_lock);
}
size_t bch_btree_gc_finish(struct cache_set *c)
static size_t bch_btree_gc_finish(struct cache_set *c)
{
size_t available = 0;
struct bucket *b;
......@@ -1703,9 +1703,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
available++;
if (!GC_MARK(b))
bch_bucket_add_unused(ca, b);
}
}
......@@ -1836,6 +1833,42 @@ int bch_btree_check(struct cache_set *c)
return btree_root(check_recurse, c, &op);
}
void bch_initial_gc_finish(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
unsigned i;
bch_btree_gc_finish(c);
mutex_lock(&c->bucket_lock);
/*
* We need to put some unused buckets directly on the prio freelist in
* order to get the allocator thread started - it needs freed buckets in
* order to rewrite the prios and gens, and it needs to rewrite prios
* and gens in order to free buckets.
*
* This is only safe for buckets that have no live data in them, which
* there should always be some of.
*/
for_each_cache(ca, c, i) {
for_each_bucket(b, ca) {
if (fifo_full(&ca->free[RESERVE_PRIO]))
break;
if (bch_can_invalidate_bucket(ca, b) &&
!GC_MARK(b)) {
__bch_invalidate_one_bucket(ca, b);
fifo_push(&ca->free[RESERVE_PRIO],
b - ca->buckets);
}
}
}
mutex_unlock(&c->bucket_lock);
}
/* Btree insertion */
static bool btree_insert_key(struct btree *b, struct bkey *k,
......
......@@ -252,7 +252,7 @@ int bch_btree_insert(struct cache_set *, struct keylist *,
atomic_t *, struct bkey *);
int bch_gc_thread_start(struct cache_set *);
size_t bch_btree_gc_finish(struct cache_set *);
void bch_initial_gc_finish(struct cache_set *);
void bch_moving_gc(struct cache_set *);
int bch_btree_check(struct cache_set *);
void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
......
......@@ -541,9 +541,6 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
closure_sync(cl);
}
#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \
fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused)
void bch_prio_write(struct cache *ca)
{
int i;
......@@ -554,10 +551,6 @@ void bch_prio_write(struct cache *ca)
lockdep_assert_held(&ca->set->bucket_lock);
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets; b++)
b->disk_gen = b->gen;
ca->disk_buckets->seq++;
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
......@@ -601,14 +594,17 @@ void bch_prio_write(struct cache *ca)
mutex_lock(&ca->set->bucket_lock);
ca->need_save_prio = 0;
/*
* Don't want the old priorities to get garbage collected until after we
* finish writing the new ones, and they're journalled
*/
for (i = 0; i < prio_buckets(ca); i++)
for (i = 0; i < prio_buckets(ca); i++) {
if (ca->prio_last_buckets[i])
__bch_bucket_free(ca,
&ca->buckets[ca->prio_last_buckets[i]]);
ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
}
static void prio_read(struct cache *ca, uint64_t bucket)
......@@ -639,7 +635,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
}
b->prio = le16_to_cpu(d->prio);
b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen;
b->gen = b->last_gc = b->gc_gen = d->gen;
}
}
......@@ -1606,7 +1602,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
bch_journal_mark(c, &journal);
bch_btree_gc_finish(c);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
/*
......@@ -1648,7 +1644,7 @@ static void run_cache_set(struct cache_set *c)
ca->sb.d[j] = ca->sb.first_bucket + j;
}
bch_btree_gc_finish(c);
bch_initial_gc_finish(c);
err = "error starting allocator thread";
for_each_cache(ca, c, i)
......@@ -1794,7 +1790,6 @@ void bch_cache_release(struct kobject *kobj)
vfree(ca->buckets);
free_heap(&ca->heap);
free_fifo(&ca->unused);
free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++)
......@@ -1831,7 +1826,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) ||
......
......@@ -446,7 +446,6 @@ TRACE_EVENT(bcache_alloc_fail,
__field(dev_t, dev )
__field(unsigned, free )
__field(unsigned, free_inc )
__field(unsigned, unused )
__field(unsigned, blocked )
),
......@@ -454,13 +453,12 @@ TRACE_EVENT(bcache_alloc_fail,
__entry->dev = ca->bdev->bd_dev;
__entry->free = fifo_used(&ca->free[reserve]);
__entry->free_inc = fifo_used(&ca->free_inc);
__entry->unused = fifo_used(&ca->unused);
__entry->blocked = atomic_read(&ca->set->prio_blocked);
),
TP_printk("alloc fail %d,%d free %u free_inc %u unused %u blocked %u",
TP_printk("alloc fail %d,%d free %u free_inc %u blocked %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->free,
__entry->free_inc, __entry->unused, __entry->blocked)
__entry->free_inc, __entry->blocked)
);
/* Background writeback */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment