Commit 0ef837a0 authored by Robbie Litchfield's avatar Robbie Litchfield Committed by Kent Overstreet

bcachefs: Fix unnecessary read amplificaiton when allocating ec stripes

When allocating an erasure coding stripe, bcachefs will always reuse any
partial stripes before reserving a new stripe. This causes unnecessary
read amplification when preparing a stripe for writing. This patch changes
bcachefs to always reserve new stripes first, only relying on stripe reuse
when copygc needs more time to empty buckets from existing stripes.
Signed-off-by: default avatarRobbie Litchfield <blam.kiwi@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 2bb748a6
...@@ -1389,6 +1389,72 @@ static s64 get_existing_stripe(struct bch_fs *c, ...@@ -1389,6 +1389,72 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret; return ret;
} }
static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
struct ec_stripe_head *h)
{
unsigned i;
s64 idx;
int ret;
idx = get_existing_stripe(c, h);
if (idx < 0) {
bch_err(c, "failed to find an existing stripe");
return -ENOSPC;
}
h->s->have_existing_stripe = true;
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
if (ret) {
bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
return ret;
}
if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
/*
* this is a problem: we have deleted from the
* stripes heap already
*/
BUG();
}
BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
__set_bit(i, h->s->blocks_gotten);
__set_bit(i, h->s->blocks_allocated);
}
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
}
bkey_copy(&h->s->new_stripe.key.k_i,
&h->s->existing_stripe.key.k_i);
return 0;
}
static int __bch2_ec_stripe_head_reserve(struct bch_fs *c,
struct ec_stripe_head *h)
{
int ret;
ret = bch2_disk_reservation_get(c, &h->s->res,
h->blocksize,
h->s->nr_parity, 0);
if (ret) {
/*
* This means we need to wait for copygc to
* empty out buckets from existing stripes:
*/
bch_err(c, "failed to reserve stripe");
}
return ret;
}
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target, unsigned target,
unsigned algo, unsigned algo,
...@@ -1397,9 +1463,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1397,9 +1463,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
struct closure *cl) struct closure *cl)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
unsigned i;
s64 idx;
int ret; int ret;
bool needs_stripe_new;
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc); h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc);
if (!h) { if (!h) {
...@@ -1407,80 +1472,44 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ...@@ -1407,80 +1472,44 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
return NULL; return NULL;
} }
if (!h->s) { needs_stripe_new = !h->s;
if (needs_stripe_new) {
if (ec_new_stripe_alloc(c, h)) { if (ec_new_stripe_alloc(c, h)) {
bch2_ec_stripe_head_put(c, h); ret = -ENOMEM;
bch_err(c, "failed to allocate new stripe"); bch_err(c, "failed to allocate new stripe");
return NULL; goto err;
}
idx = get_existing_stripe(c, h);
if (idx >= 0) {
h->s->have_existing_stripe = true;
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
if (ret) {
bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
bch2_ec_stripe_head_put(c, h);
return NULL;
}
if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
/*
* this is a problem: we have deleted from the
* stripes heap already
*/
BUG();
}
BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
__set_bit(i, h->s->blocks_gotten);
__set_bit(i, h->s->blocks_allocated);
}
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
}
bkey_copy(&h->s->new_stripe.key.k_i,
&h->s->existing_stripe.key.k_i);
} }
if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) { if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize))
BUG(); BUG();
}
} }
if (!h->s->allocated) { /*
if (!h->s->have_existing_stripe && * Try reserve a new stripe before reusing an
!h->s->res.sectors) { * existing stripe. This will prevent unnecessary
ret = bch2_disk_reservation_get(c, &h->s->res, * read amplification during write oriented workloads.
h->blocksize, */
h->s->nr_parity, 0); ret = 0;
if (ret) { if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
/* ret = __bch2_ec_stripe_head_reserve(c, h);
* This means we need to wait for copygc to if (ret && needs_stripe_new)
* empty out buckets from existing stripes: ret = __bch2_ec_stripe_head_reuse(c, h);
*/ if (ret)
bch2_ec_stripe_head_put(c, h); goto err;
h = NULL;
goto out;
}
}
if (!h->s->allocated) {
ret = new_stripe_alloc_buckets(c, h, cl); ret = new_stripe_alloc_buckets(c, h, cl);
if (ret) { if (ret)
bch2_ec_stripe_head_put(c, h); goto err;
h = ERR_PTR(-ret);
goto out;
}
h->s->allocated = true; h->s->allocated = true;
} }
out:
return h; return h;
err:
bch2_ec_stripe_head_put(c, h);
return ERR_PTR(-ret);
} }
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment