Commit 67539e85 authored by Kent Overstreet's avatar Kent Overstreet

bcache: Add struct bset_sort_state

More disentangling bset.c from the rest of the bcache code - soon, the
sorting routines won't have any dependencies on any outside structs.
Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 911c9610
...@@ -187,6 +187,7 @@ ...@@ -187,6 +187,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include "bset.h"
#include "util.h" #include "util.h"
#include "closure.h" #include "closure.h"
...@@ -645,8 +646,7 @@ struct cache_set { ...@@ -645,8 +646,7 @@ struct cache_set {
*/ */
mempool_t *fill_iter; mempool_t *fill_iter;
mempool_t *sort_pool; struct bset_sort_state sort;
unsigned sort_crit_factor;
/* List of buckets we're currently writing data to */ /* List of buckets we're currently writing data to */
struct list_head data_buckets; struct list_head data_buckets;
...@@ -662,7 +662,6 @@ struct cache_set { ...@@ -662,7 +662,6 @@ struct cache_set {
unsigned congested_read_threshold_us; unsigned congested_read_threshold_us;
unsigned congested_write_threshold_us; unsigned congested_write_threshold_us;
struct time_stats sort_time;
struct time_stats btree_gc_time; struct time_stats btree_gc_time;
struct time_stats btree_split_time; struct time_stats btree_split_time;
struct time_stats btree_read_time; struct time_stats btree_read_time;
......
...@@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, ...@@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
/* Mergesort */ /* Mergesort */
void bch_bset_sort_state_free(struct bset_sort_state *state)
{
if (state->pool)
mempool_destroy(state->pool);
}
int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
{
spin_lock_init(&state->time.lock);
state->page_order = page_order;
state->crit_factor = int_sqrt(1 << page_order);
state->pool = mempool_create_page_pool(1, page_order);
if (!state->pool)
return -ENOMEM;
return 0;
}
static void sort_key_next(struct btree_iter *iter, static void sort_key_next(struct btree_iter *iter,
struct btree_iter_set *i) struct btree_iter_set *i)
{ {
...@@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out, ...@@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out,
} }
static void __btree_sort(struct btree *b, struct btree_iter *iter, static void __btree_sort(struct btree *b, struct btree_iter *iter,
unsigned start, unsigned order, bool fixup) unsigned start, unsigned order, bool fixup,
struct bset_sort_state *state)
{ {
uint64_t start_time; uint64_t start_time;
bool remove_stale = !b->written;
bool used_mempool = false; bool used_mempool = false;
struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
order); order);
if (!out) { if (!out) {
out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO)); BUG_ON(order > state->page_order);
out = page_address(mempool_alloc(state->pool, GFP_NOIO));
used_mempool = true; used_mempool = true;
order = ilog2(bucket_pages(b->c)); order = ilog2(bucket_pages(b->c));
} }
start_time = local_clock(); start_time = local_clock();
btree_mergesort(b, out, iter, fixup, remove_stale); btree_mergesort(b, out, iter, fixup, false);
b->nsets = start; b->nsets = start;
if (!start && order == b->page_order) { if (!start && order == b->page_order) {
...@@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, ...@@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
} }
if (used_mempool) if (used_mempool)
mempool_free(virt_to_page(out), b->c->sort_pool); mempool_free(virt_to_page(out), state->pool);
else else
free_pages((unsigned long) out, order); free_pages((unsigned long) out, order);
if (b->written) bset_build_written_tree(b);
bset_build_written_tree(b);
if (!start) if (!start)
bch_time_stats_update(&b->c->sort_time, start_time); bch_time_stats_update(&state->time, start_time);
} }
void bch_btree_sort_partial(struct btree *b, unsigned start) void bch_btree_sort_partial(struct btree *b, unsigned start,
struct bset_sort_state *state)
{ {
size_t order = b->page_order, keys = 0; size_t order = b->page_order, keys = 0;
struct btree_iter iter; struct btree_iter iter;
...@@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) ...@@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start)
order = ilog2(order); order = ilog2(order);
} }
__btree_sort(b, &iter, start, order, false); __btree_sort(b, &iter, start, order, false, state);
EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
} }
void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
struct bset_sort_state *state)
{ {
BUG_ON(!b->written); __btree_sort(b, iter, 0, b->page_order, true, state);
__btree_sort(b, iter, 0, b->page_order, true);
} }
void bch_btree_sort_into(struct btree *b, struct btree *new) void bch_btree_sort_into(struct btree *b, struct btree *new,
struct bset_sort_state *state)
{ {
uint64_t start_time = local_clock(); uint64_t start_time = local_clock();
...@@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) ...@@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
btree_mergesort(b, new->sets->data, &iter, false, true); btree_mergesort(b, new->sets->data, &iter, false, true);
bch_time_stats_update(&b->c->sort_time, start_time); bch_time_stats_update(&state->time, start_time);
bkey_copy_key(&new->key, &b->key);
new->sets->size = 0; new->sets->size = 0;
} }
#define SORT_CRIT (4096 / sizeof(uint64_t)) #define SORT_CRIT (4096 / sizeof(uint64_t))
void bch_btree_sort_lazy(struct btree *b) void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state)
{ {
unsigned crit = SORT_CRIT; unsigned crit = SORT_CRIT;
int i; int i;
...@@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b) ...@@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b)
if (!b->nsets) if (!b->nsets)
goto out; goto out;
/* If not a leaf node, always sort */
if (b->level) {
bch_btree_sort(b);
return;
}
for (i = b->nsets - 1; i >= 0; --i) { for (i = b->nsets - 1; i >= 0; --i) {
crit *= b->c->sort_crit_factor; crit *= state->crit_factor;
if (b->sets[i].data->keys < crit) { if (b->sets[i].data->keys < crit) {
bch_btree_sort_partial(b, i); bch_btree_sort_partial(b, i, state);
return; return;
} }
} }
/* Sort if we'd overflow */ /* Sort if we'd overflow */
if (b->nsets + 1 == MAX_BSETS) { if (b->nsets + 1 == MAX_BSETS) {
bch_btree_sort(b); bch_btree_sort(b, state);
return; return;
} }
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "util.h" /* for time_stats */
/* /*
* BKEYS: * BKEYS:
* *
...@@ -190,6 +192,33 @@ struct bset_tree { ...@@ -190,6 +192,33 @@ struct bset_tree {
struct bset *data; struct bset *data;
}; };
/* Sorting */
struct bset_sort_state {
mempool_t *pool;
unsigned page_order;
unsigned crit_factor;
struct time_stats time;
};
void bch_bset_sort_state_free(struct bset_sort_state *);
int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *);
void bch_btree_sort_into(struct btree *, struct btree *,
struct bset_sort_state *);
void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *,
struct bset_sort_state *);
void bch_btree_sort_partial(struct btree *, unsigned,
struct bset_sort_state *);
static inline void bch_btree_sort(struct btree *b,
struct bset_sort_state *state)
{
bch_btree_sort_partial(b, 0, state);
}
/* Keylists */ /* Keylists */
struct keylist { struct keylist {
...@@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, ...@@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
}) })
bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
void bch_btree_sort_lazy(struct btree *);
void bch_btree_sort_into(struct btree *, struct btree *);
void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
void bch_btree_sort_partial(struct btree *, unsigned);
static inline void bch_btree_sort(struct btree *b)
{
bch_btree_sort_partial(b, 0);
}
int bch_bset_print_stats(struct cache_set *, char *); int bch_bset_print_stats(struct cache_set *, char *);
......
...@@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b) ...@@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b)
if (i->seq == b->sets[0].data->seq) if (i->seq == b->sets[0].data->seq)
goto err; goto err;
bch_btree_sort_and_fix_extents(b, iter); bch_btree_sort_and_fix_extents(b, iter, &b->c->sort);
i = b->sets[0].data; i = b->sets[0].data;
err = "short btree key"; err = "short btree key";
...@@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) ...@@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
bch_btree_sort_lazy(b); /* If not a leaf node, always sort */
if (b->level && b->nsets)
bch_btree_sort(b, &b->c->sort);
else
bch_btree_sort_lazy(b, &b->c->sort);
/* /*
* do verify if there was more than one set initially (i.e. we did a * do verify if there was more than one set initially (i.e. we did a
...@@ -1125,8 +1129,10 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait) ...@@ -1125,8 +1129,10 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait)
static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
{ {
struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
if (!IS_ERR_OR_NULL(n)) if (!IS_ERR_OR_NULL(n)) {
bch_btree_sort_into(b, n); bch_btree_sort_into(b, n, &b->c->sort);
bkey_copy_key(&n->key, &b->key);
}
return n; return n;
} }
......
...@@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl) ...@@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl)
if (ca) if (ca)
kobject_put(&ca->kobj); kobject_put(&ca->kobj);
bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
if (c->bio_split) if (c->bio_split)
...@@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->btree_pages = max_t(int, c->btree_pages / 4, c->btree_pages = max_t(int, c->btree_pages / 4,
BTREE_MAX_PAGES); BTREE_MAX_PAGES);
c->sort_crit_factor = int_sqrt(c->btree_pages);
sema_init(&c->sb_write_mutex, 1); sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock); mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait); init_waitqueue_head(&c->try_wait);
init_waitqueue_head(&c->bucket_wait); init_waitqueue_head(&c->bucket_wait);
sema_init(&c->uuid_write_mutex, 1); sema_init(&c->uuid_write_mutex, 1);
spin_lock_init(&c->sort_time.lock);
spin_lock_init(&c->btree_gc_time.lock); spin_lock_init(&c->btree_gc_time.lock);
spin_lock_init(&c->btree_split_time.lock); spin_lock_init(&c->btree_split_time.lock);
spin_lock_init(&c->btree_read_time.lock); spin_lock_init(&c->btree_read_time.lock);
...@@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) ...@@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
bucket_pages(c))) || bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(c->sort_pool = mempool_create_page_pool(1,
ilog2(bucket_pages(c)))) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
bch_journal_alloc(c) || bch_journal_alloc(c) ||
bch_btree_cache_alloc(c) || bch_btree_cache_alloc(c) ||
bch_open_buckets_alloc(c)) bch_open_buckets_alloc(c) ||
bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
goto err; goto err;
c->congested_read_threshold_us = 2000; c->congested_read_threshold_us = 2000;
......
...@@ -490,7 +490,7 @@ SHOW(__bch_cache_set) ...@@ -490,7 +490,7 @@ SHOW(__bch_cache_set)
sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment