Commit 7635e1a6 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Rework open bucket partial list allocation

Now, any open_bucket can go on the partial list: allocating from the
partial list has been moved to its own dedicated function,
open_bucket_add_bucets() -> bucket_alloc_set_partial().

In particular, this means that erasure coded buckets can safely go on
the partial list; the new location works with the "allocate an ec bucket
first, then the rest" logic.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e53d03fe
...@@ -154,12 +154,9 @@ static void open_bucket_free_unused(struct bch_fs *c, ...@@ -154,12 +154,9 @@ static void open_bucket_free_unused(struct bch_fs *c,
struct write_point *wp, struct write_point *wp,
struct open_bucket *ob) struct open_bucket *ob)
{ {
bool may_realloc = wp->data_type == BCH_DATA_user;
BUG_ON(c->open_buckets_partial_nr >= BUG_ON(c->open_buckets_partial_nr >=
ARRAY_SIZE(c->open_buckets_partial)); ARRAY_SIZE(c->open_buckets_partial));
if (may_realloc) {
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
ob->on_partial_list = true; ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] = c->open_buckets_partial[c->open_buckets_partial_nr++] =
...@@ -168,9 +165,6 @@ static void open_bucket_free_unused(struct bch_fs *c, ...@@ -168,9 +165,6 @@ static void open_bucket_free_unused(struct bch_fs *c,
closure_wake_up(&c->open_buckets_wait); closure_wake_up(&c->open_buckets_wait);
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
} else {
bch2_open_bucket_put(c, ob);
}
} }
/* _only_ for allocating the journal on a new device: */ /* _only_ for allocating the journal on a new device: */
...@@ -256,7 +250,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * ...@@ -256,7 +250,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ob->valid = true; ob->valid = true;
ob->sectors_free = ca->mi.bucket_size; ob->sectors_free = ca->mi.bucket_size;
ob->alloc_reserve = reserve;
ob->dev = ca->dev_idx; ob->dev = ca->dev_idx;
ob->gen = a->gen; ob->gen = a->gen;
ob->bucket = bucket; ob->bucket = bucket;
...@@ -383,33 +376,6 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc ...@@ -383,33 +376,6 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
return ob; return ob;
} }
static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve)
{
struct open_bucket *ob;
int i;
spin_lock(&c->freelist_lock);
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
ob = c->open_buckets + c->open_buckets_partial[i];
if (ob->dev == ca->dev_idx &&
reserve <= ob->alloc_reserve) {
array_remove_item(c->open_buckets_partial,
c->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
ob->alloc_reserve = reserve;
spin_unlock(&c->freelist_lock);
return ob;
}
}
spin_unlock(&c->freelist_lock);
return NULL;
}
/* /*
* This path is for before the freespace btree is initialized: * This path is for before the freespace btree is initialized:
* *
...@@ -533,7 +499,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, ...@@ -533,7 +499,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca, struct bch_dev *ca,
enum alloc_reserve reserve, enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl, struct closure *cl,
struct bch_dev_usage *usage) struct bch_dev_usage *usage)
{ {
...@@ -572,12 +537,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -572,12 +537,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
if (waiting) if (waiting)
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
if (may_alloc_partial) {
ob = try_alloc_partial_bucket(c, ca, reserve);
if (ob)
return ob;
}
alloc: alloc:
ob = likely(freespace) ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
...@@ -597,7 +556,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -597,7 +556,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
if (!IS_ERR(ob)) if (!IS_ERR(ob))
trace_and_count(c, bucket_alloc, ca, trace_and_count(c, bucket_alloc, ca,
bch2_alloc_reserves[reserve], bch2_alloc_reserves[reserve],
may_alloc_partial,
ob->bucket, ob->bucket,
usage->d[BCH_DATA_free].buckets, usage->d[BCH_DATA_free].buckets,
avail, avail,
...@@ -609,7 +567,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -609,7 +567,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart)) else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
trace_and_count(c, bucket_alloc_fail, ca, trace_and_count(c, bucket_alloc_fail, ca,
bch2_alloc_reserves[reserve], bch2_alloc_reserves[reserve],
may_alloc_partial,
0, 0,
usage->d[BCH_DATA_free].buckets, usage->d[BCH_DATA_free].buckets,
avail, avail,
...@@ -624,7 +581,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ...@@ -624,7 +581,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve, enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl) struct closure *cl)
{ {
struct bch_dev_usage usage; struct bch_dev_usage usage;
...@@ -632,7 +588,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, ...@@ -632,7 +588,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
bch2_trans_do(c, NULL, NULL, 0, bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
may_alloc_partial, cl, &usage))); cl, &usage)));
return ob; return ob;
} }
...@@ -689,12 +645,10 @@ void bch2_dev_stripe_increment(struct bch_dev *ca, ...@@ -689,12 +645,10 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
bch2_dev_stripe_increment_inlined(ca, stripe, &usage); bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
} }
#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0) static int add_new_bucket(struct bch_fs *c,
#define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
static void add_new_bucket(struct bch_fs *c,
struct open_buckets *ptrs, struct open_buckets *ptrs,
struct bch_devs_mask *devs_may_alloc, struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
unsigned flags, unsigned flags,
...@@ -703,12 +657,21 @@ static void add_new_bucket(struct bch_fs *c, ...@@ -703,12 +657,21 @@ static void add_new_bucket(struct bch_fs *c,
unsigned durability = unsigned durability =
bch_dev_bkey_exists(c, ob->dev)->mi.durability; bch_dev_bkey_exists(c, ob->dev)->mi.durability;
BUG_ON(*nr_effective >= nr_replicas);
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
__clear_bit(ob->dev, devs_may_alloc->d); __clear_bit(ob->dev, devs_may_alloc->d);
*nr_effective += (flags & BUCKET_ALLOC_USE_DURABILITY) *nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
? durability : 1; ? durability : 1;
*have_cache |= !durability; *have_cache |= !durability;
ob_push(c, ptrs, ob); ob_push(c, ptrs, ob);
if (*nr_effective >= nr_replicas)
return 1;
if (ob->ec)
return 1;
return 0;
} }
int bch2_bucket_alloc_set_trans(struct btree_trans *trans, int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
...@@ -718,8 +681,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, ...@@ -718,8 +681,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
enum alloc_reserve reserve,
unsigned flags, unsigned flags,
enum bch_data_type data_type,
enum alloc_reserve reserve,
struct closure *cl) struct closure *cl)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
...@@ -752,8 +716,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, ...@@ -752,8 +716,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue; continue;
} }
ob = bch2_bucket_alloc_trans(trans, ca, reserve, ob = bch2_bucket_alloc_trans(trans, ca, reserve, cl, &usage);
flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
if (!IS_ERR(ob)) if (!IS_ERR(ob))
bch2_dev_stripe_increment_inlined(ca, stripe, &usage); bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
percpu_ref_put(&ca->ref); percpu_ref_put(&ca->ref);
...@@ -765,10 +728,11 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, ...@@ -765,10 +728,11 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue; continue;
} }
add_new_bucket(c, ptrs, devs_may_alloc, ob->data_type = data_type;
nr_effective, have_cache, flags, ob);
if (*nr_effective >= nr_replicas) { if (add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob)) {
ret = 0; ret = 0;
break; break;
} }
...@@ -790,7 +754,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, ...@@ -790,7 +754,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct write_point *wp, struct write_point *wp,
struct bch_devs_mask *devs_may_alloc, struct bch_devs_mask *devs_may_alloc,
u16 target, u16 target,
unsigned erasure_code,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
...@@ -804,9 +767,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, ...@@ -804,9 +767,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct open_bucket *ob; struct open_bucket *ob;
struct bch_dev *ca; struct bch_dev *ca;
unsigned i, ec_idx; unsigned i, ec_idx;
int ret = 0;
if (!erasure_code)
return 0;
if (nr_replicas < 2) if (nr_replicas < 2)
return 0; return 0;
...@@ -840,53 +801,124 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, ...@@ -840,53 +801,124 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
ob->ec = h->s; ob->ec = h->s;
ec_stripe_new_get(h->s); ec_stripe_new_get(h->s);
add_new_bucket(c, ptrs, devs_may_alloc, ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob); nr_replicas, nr_effective,
have_cache, flags, ob);
out_put_head: out_put_head:
bch2_ec_stripe_head_put(c, h); bch2_ec_stripe_head_put(c, h);
return 0; return ret;
} }
/* Sector allocator */ /* Sector allocator */
static void get_buckets_from_writepoint(struct bch_fs *c, static bool want_bucket(struct bch_fs *c,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
bool *have_cache, bool ec,
struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
if (!test_bit(ob->dev, devs_may_alloc->d))
return false;
if (ob->data_type != wp->data_type)
return false;
if (!ca->mi.durability &&
(wp->data_type == BCH_DATA_btree || ec || *have_cache))
return false;
if (ec != (ob->ec != NULL))
return false;
return true;
}
static int bucket_alloc_set_writepoint(struct bch_fs *c,
struct open_buckets *ptrs, struct open_buckets *ptrs,
struct write_point *wp, struct write_point *wp,
struct bch_devs_mask *devs_may_alloc, struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
unsigned flags, bool ec, unsigned flags)
bool need_ec)
{ {
struct open_buckets ptrs_skip = { .nr = 0 }; struct open_buckets ptrs_skip = { .nr = 0 };
struct open_bucket *ob; struct open_bucket *ob;
unsigned i; unsigned i;
int ret = 0;
open_bucket_for_each(c, &wp->ptrs, ob, i) { open_bucket_for_each(c, &wp->ptrs, ob, i) {
if (!ret && want_bucket(c, wp, devs_may_alloc,
have_cache, ec, ob))
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
else
ob_push(c, &ptrs_skip, ob);
}
wp->ptrs = ptrs_skip;
return ret;
}
static int bucket_alloc_set_partial(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache, bool ec,
enum alloc_reserve reserve,
unsigned flags)
{
int i, ret = 0;
if (!c->open_buckets_partial_nr)
return 0;
spin_lock(&c->freelist_lock);
if (!c->open_buckets_partial_nr)
goto unlock;
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
struct bch_dev_usage usage;
u64 avail;
if (*nr_effective < nr_replicas && bch2_dev_usage_read_fast(ca, &usage);
test_bit(ob->dev, devs_may_alloc->d) && avail = dev_buckets_free(ca, usage, reserve);
(ca->mi.durability || if (!avail)
(wp->data_type == BCH_DATA_user && !*have_cache)) && continue;
(ob->ec || !need_ec)) {
add_new_bucket(c, ptrs, devs_may_alloc, array_remove_item(c->open_buckets_partial,
nr_effective, have_cache, c->open_buckets_partial_nr,
flags, ob); i);
} else { ob->on_partial_list = false;
ob_push(c, &ptrs_skip, ob);
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
if (ret)
break;
} }
} }
wp->ptrs = ptrs_skip; unlock:
spin_unlock(&c->freelist_lock);
return ret;
} }
static int open_bucket_add_buckets(struct btree_trans *trans, static int __open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs, struct open_buckets *ptrs,
struct write_point *wp, struct write_point *wp,
struct bch_devs_list *devs_have, struct bch_devs_list *devs_have,
u16 target, u16 target,
unsigned erasure_code, bool erasure_code,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
...@@ -898,8 +930,8 @@ static int open_bucket_add_buckets(struct btree_trans *trans, ...@@ -898,8 +930,8 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
struct bch_devs_mask devs; struct bch_devs_mask devs;
struct open_bucket *ob; struct open_bucket *ob;
struct closure *cl = NULL; struct closure *cl = NULL;
int ret;
unsigned i; unsigned i;
int ret;
rcu_read_lock(); rcu_read_lock();
devs = target_rw_devs(c, wp->data_type, target); devs = target_rw_devs(c, wp->data_type, target);
...@@ -912,35 +944,28 @@ static int open_bucket_add_buckets(struct btree_trans *trans, ...@@ -912,35 +944,28 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
open_bucket_for_each(c, ptrs, ob, i) open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d); __clear_bit(ob->dev, devs.d);
if (erasure_code) { if (erasure_code && ec_open_bucket(c, ptrs))
if (!ec_open_bucket(c, ptrs)) {
get_buckets_from_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, flags, true);
if (*nr_effective >= nr_replicas)
return 0; return 0;
}
if (!ec_open_bucket(c, ptrs)) { ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target, erasure_code,
nr_replicas, nr_effective, nr_replicas, nr_effective,
have_cache, reserve, flags, _cl); have_cache, erasure_code, flags);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || if (ret)
bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret; return ret;
if (*nr_effective >= nr_replicas)
return 0;
}
}
get_buckets_from_writepoint(c, ptrs, wp, &devs, ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
nr_replicas, nr_effective, nr_replicas, nr_effective,
have_cache, flags, false); have_cache, erasure_code, reserve, flags);
if (*nr_effective >= nr_replicas) if (ret)
return 0; return ret;
if (erasure_code) {
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target,
nr_replicas, nr_effective,
have_cache,
reserve, flags, _cl);
} else {
retry_blocking: retry_blocking:
/* /*
* Try nonblocking first, so that if one device is full we'll try from * Try nonblocking first, so that if one device is full we'll try from
...@@ -948,7 +973,7 @@ static int open_bucket_add_buckets(struct btree_trans *trans, ...@@ -948,7 +973,7 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
*/ */
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs, ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache, nr_replicas, nr_effective, have_cache,
reserve, flags, cl); flags, wp->data_type, reserve, cl);
if (ret && if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) && !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
...@@ -957,7 +982,45 @@ static int open_bucket_add_buckets(struct btree_trans *trans, ...@@ -957,7 +982,45 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
goto retry_blocking; goto retry_blocking;
} }
}
return ret;
}
static int open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_list *devs_have,
u16 target,
unsigned erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
enum alloc_reserve reserve,
unsigned flags,
struct closure *cl)
{
int ret;
if (erasure_code) {
ret = __open_bucket_add_buckets(trans, ptrs, wp,
devs_have, target, erasure_code,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret; return ret;
if (*nr_effective >= nr_replicas)
return 0;
}
ret = __open_bucket_add_buckets(trans, ptrs, wp,
devs_have, target, false,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
return ret < 0 ? ret : 0;
} }
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca, void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
...@@ -1156,13 +1219,11 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, ...@@ -1156,13 +1219,11 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
struct open_bucket *ob; struct open_bucket *ob;
struct open_buckets ptrs; struct open_buckets ptrs;
unsigned nr_effective, write_points_nr; unsigned nr_effective, write_points_nr;
unsigned ob_flags = 0;
bool have_cache; bool have_cache;
int ret; int ret;
int i; int i;
if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
BUG_ON(!nr_replicas || !nr_replicas_required); BUG_ON(!nr_replicas || !nr_replicas_required);
retry: retry:
...@@ -1173,34 +1234,42 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, ...@@ -1173,34 +1234,42 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
*wp_ret = wp = writepoint_find(trans, write_point.v); *wp_ret = wp = writepoint_find(trans, write_point.v);
if (wp->data_type == BCH_DATA_user)
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
/* metadata may not allocate on cache devices: */ /* metadata may not allocate on cache devices: */
if (wp->data_type != BCH_DATA_user) if (wp->data_type != BCH_DATA_user)
have_cache = true; have_cache = true;
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, cl);
} else {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code, target, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, &have_cache, reserve,
ob_flags, NULL); flags, NULL);
if (!ret || if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done; goto alloc_done;
/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
goto allocate_blocking;
/*
* Only try to allocate cache (durability = 0 devices) from the
* specified target:
*/
have_cache = true;
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
0, erasure_code, 0, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, &have_cache, reserve,
ob_flags, cl); flags, cl);
} else {
allocate_blocking:
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
flags, cl);
} }
alloc_done: alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas); BUG_ON(!ret && nr_effective < nr_replicas);
......
...@@ -31,8 +31,7 @@ void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *); ...@@ -31,8 +31,7 @@ void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
long bch2_bucket_alloc_new_fs(struct bch_dev *); long bch2_bucket_alloc_new_fs(struct bch_dev *);
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
enum alloc_reserve, bool, enum alloc_reserve, struct closure *);
struct closure *);
static inline void ob_push(struct bch_fs *c, struct open_buckets *obs, static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
struct open_bucket *ob) struct open_bucket *ob)
...@@ -152,8 +151,9 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 ...@@ -152,8 +151,9 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *, int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *, struct dev_stripe_state *, struct bch_devs_mask *,
unsigned, unsigned *, bool *, enum alloc_reserve, unsigned, unsigned *, bool *, unsigned,
unsigned, struct closure *); enum bch_data_type, enum alloc_reserve,
struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *, int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned, unsigned, unsigned,
......
...@@ -53,10 +53,9 @@ struct open_bucket { ...@@ -53,10 +53,9 @@ struct open_bucket {
* the block in the stripe this open_bucket corresponds to: * the block in the stripe this open_bucket corresponds to:
*/ */
u8 ec_idx; u8 ec_idx;
enum bch_data_type data_type:8; enum bch_data_type data_type:6;
unsigned valid:1; unsigned valid:1;
unsigned on_partial_list:1; unsigned on_partial_list:1;
unsigned alloc_reserve:3;
u8 dev; u8 dev;
u8 gen; u8 gen;
......
...@@ -1451,9 +1451,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ ...@@ -1451,9 +1451,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
&devs, &devs,
h->s->nr_parity, h->s->nr_parity,
&nr_have_parity, &nr_have_parity,
&have_cache, &have_cache, 0,
BCH_DATA_parity,
reserve, reserve,
0,
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) { open_bucket_for_each(c, &buckets, ob, i) {
...@@ -1478,9 +1478,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ ...@@ -1478,9 +1478,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
&devs, &devs,
h->s->nr_data, h->s->nr_data,
&nr_have_data, &nr_have_data,
&have_cache, &have_cache, 0,
BCH_DATA_user,
reserve, reserve,
0,
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) { open_bucket_for_each(c, &buckets, ob, i) {
......
...@@ -780,8 +780,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ...@@ -780,8 +780,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break; break;
} }
} else { } else {
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, cl);
false, cl);
ret = PTR_ERR_OR_ZERO(ob[nr_got]); ret = PTR_ERR_OR_ZERO(ob[nr_got]);
if (ret) if (ret)
break; break;
......
...@@ -516,7 +516,6 @@ DEFINE_EVENT(bch_fs, gc_gens_end, ...@@ -516,7 +516,6 @@ DEFINE_EVENT(bch_fs, gc_gens_end,
DECLARE_EVENT_CLASS(bucket_alloc, DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket, u64 bucket,
u64 free, u64 free,
u64 avail, u64 avail,
...@@ -525,14 +524,13 @@ DECLARE_EVENT_CLASS(bucket_alloc, ...@@ -525,14 +524,13 @@ DECLARE_EVENT_CLASS(bucket_alloc,
struct bucket_alloc_state *s, struct bucket_alloc_state *s,
bool nonblocking, bool nonblocking,
const char *err), const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail, TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for, copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err), s, nonblocking, err),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u8, dev ) __field(u8, dev )
__array(char, reserve, 16 ) __array(char, reserve, 16 )
__field(bool, user )
__field(u64, bucket ) __field(u64, bucket )
__field(u64, free ) __field(u64, free )
__field(u64, avail ) __field(u64, avail )
...@@ -550,7 +548,6 @@ DECLARE_EVENT_CLASS(bucket_alloc, ...@@ -550,7 +548,6 @@ DECLARE_EVENT_CLASS(bucket_alloc,
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->dev_idx; __entry->dev = ca->dev_idx;
strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve)); strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
__entry->user = user;
__entry->bucket = bucket; __entry->bucket = bucket;
__entry->free = free; __entry->free = free;
__entry->avail = avail; __entry->avail = avail;
...@@ -565,9 +562,8 @@ DECLARE_EVENT_CLASS(bucket_alloc, ...@@ -565,9 +562,8 @@ DECLARE_EVENT_CLASS(bucket_alloc,
strscpy(__entry->err, err, sizeof(__entry->err)); strscpy(__entry->err, err, sizeof(__entry->err));
), ),
TP_printk("reserve %s user %u bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s", TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
__entry->reserve, __entry->reserve,
__entry->user,
__entry->dev, __entry->dev,
__entry->bucket, __entry->bucket,
__entry->free, __entry->free,
...@@ -585,7 +581,6 @@ DECLARE_EVENT_CLASS(bucket_alloc, ...@@ -585,7 +581,6 @@ DECLARE_EVENT_CLASS(bucket_alloc,
DEFINE_EVENT(bucket_alloc, bucket_alloc, DEFINE_EVENT(bucket_alloc, bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket, u64 bucket,
u64 free, u64 free,
u64 avail, u64 avail,
...@@ -594,14 +589,13 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc, ...@@ -594,14 +589,13 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc,
struct bucket_alloc_state *s, struct bucket_alloc_state *s,
bool nonblocking, bool nonblocking,
const char *err), const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail, TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for, copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err) s, nonblocking, err)
); );
DEFINE_EVENT(bucket_alloc, bucket_alloc_fail, DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket, u64 bucket,
u64 free, u64 free,
u64 avail, u64 avail,
...@@ -610,7 +604,7 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc_fail, ...@@ -610,7 +604,7 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
struct bucket_alloc_state *s, struct bucket_alloc_state *s,
bool nonblocking, bool nonblocking,
const char *err), const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail, TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for, copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err) s, nonblocking, err)
); );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment