Commit d9534cc9 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: fix buffer overflow in nocow write path

BCH_REPLICAS_MAX isn't the actual maximum number of pointers in an
extent, it's the maximum number of dirty pointers.

We don't have a real restriction on the number of cached pointers, and
we don't want a fixed size array here anyways - so switch to
DARRAY_PREALLOCATED().
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
Reported-and-tested-by: default avatarDaniel J Blueman <daniel@quora.org>
parent 099dc5c2
...@@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done) ...@@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done)
bch2_write_done(cl); bch2_write_done(cl);
} }
struct bucket_to_lock {
struct bpos b;
unsigned gen;
struct nocow_lock_bucket *l;
};
static void bch2_nocow_write(struct bch_write_op *op) static void bch2_nocow_write(struct bch_write_op *op)
{ {
struct bch_fs *c = op->c; struct bch_fs *c = op->c;
...@@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op)
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_ptrs_c ptrs; struct bkey_ptrs_c ptrs;
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
struct { DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
struct bpos b; struct bucket_to_lock *i;
unsigned gen;
struct nocow_lock_bucket *l;
} buckets[BCH_REPLICAS_MAX];
unsigned nr_buckets = 0;
u32 snapshot; u32 snapshot;
int ret, i; struct bucket_to_lock *stale_at;
int ret;
if (op->flags & BCH_WRITE_MOVE) if (op->flags & BCH_WRITE_MOVE)
return; return;
darray_init(&buckets);
trans = bch2_trans_get(c); trans = bch2_trans_get(c);
retry: retry:
bch2_trans_begin(trans); bch2_trans_begin(trans);
...@@ -1250,7 +1254,7 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1250,7 +1254,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
while (1) { while (1) {
struct bio *bio = &op->wbio.bio; struct bio *bio = &op->wbio.bio;
nr_buckets = 0; buckets.nr = 0;
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k); ret = bkey_err(k);
...@@ -1263,26 +1267,26 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1263,26 +1267,26 @@ static void bch2_nocow_write(struct bch_write_op *op)
break; break;
if (bch2_keylist_realloc(&op->insert_keys, if (bch2_keylist_realloc(&op->insert_keys,
op->inline_keys, op->inline_keys,
ARRAY_SIZE(op->inline_keys), ARRAY_SIZE(op->inline_keys),
k.k->u64s)) k.k->u64s))
break; break;
/* Get iorefs before dropping btree locks: */ /* Get iorefs before dropping btree locks: */
ptrs = bch2_bkey_ptrs_c(k); ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) { bkey_for_each_ptr(ptrs, ptr) {
buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); struct bpos b = PTR_BUCKET_POS(c, ptr);
buckets[nr_buckets].gen = ptr->gen; struct nocow_lock_bucket *l =
buckets[nr_buckets].l = bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
bucket_nocow_lock(&c->nocow_locks, prefetch(l);
bucket_to_u64(buckets[nr_buckets].b));
prefetch(buckets[nr_buckets].l);
if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE))) if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
goto err_get_ioref; goto err_get_ioref;
nr_buckets++; /* XXX allocating memory with btree locks held - rare */
darray_push_gfp(&buckets, ((struct bucket_to_lock) {
.b = b, .gen = ptr->gen, .l = l,
}), GFP_KERNEL|__GFP_NOFAIL);
if (ptr->unwritten) if (ptr->unwritten)
op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
...@@ -1296,21 +1300,21 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1296,21 +1300,21 @@ static void bch2_nocow_write(struct bch_write_op *op)
if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
for (i = 0; i < nr_buckets; i++) { darray_for_each(buckets, i) {
struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);
struct nocow_lock_bucket *l = buckets[i].l;
bool stale;
__bch2_bucket_nocow_lock(&c->nocow_locks, l, __bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
bucket_to_u64(buckets[i].b), bucket_to_u64(i->b),
BUCKET_NOCOW_LOCK_UPDATE); BUCKET_NOCOW_LOCK_UPDATE);
rcu_read_lock(); rcu_read_lock();
stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
rcu_read_unlock(); rcu_read_unlock();
if (unlikely(stale)) if (unlikely(stale)) {
stale_at = i;
goto err_bucket_stale; goto err_bucket_stale;
}
} }
bio = &op->wbio.bio; bio = &op->wbio.bio;
...@@ -1346,15 +1350,14 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1346,15 +1350,14 @@ static void bch2_nocow_write(struct bch_write_op *op)
if (ret) { if (ret) {
bch_err_inum_offset_ratelimited(c, bch_err_inum_offset_ratelimited(c,
op->pos.inode, op->pos.inode, op->pos.offset << 9,
op->pos.offset << 9, "%s: btree lookup error %s", __func__, bch2_err_str(ret));
"%s: btree lookup error %s",
__func__, bch2_err_str(ret));
op->error = ret; op->error = ret;
op->flags |= BCH_WRITE_DONE; op->flags |= BCH_WRITE_DONE;
} }
bch2_trans_put(trans); bch2_trans_put(trans);
darray_exit(&buckets);
/* fallback to cow write path? */ /* fallback to cow write path? */
if (!(op->flags & BCH_WRITE_DONE)) { if (!(op->flags & BCH_WRITE_DONE)) {
...@@ -1374,24 +1377,21 @@ static void bch2_nocow_write(struct bch_write_op *op) ...@@ -1374,24 +1377,21 @@ static void bch2_nocow_write(struct bch_write_op *op)
} }
return; return;
err_get_ioref: err_get_ioref:
for (i = 0; i < nr_buckets; i++) darray_for_each(buckets, i)
percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);
/* Fall back to COW path: */ /* Fall back to COW path: */
goto out; goto out;
err_bucket_stale: err_bucket_stale:
while (i >= 0) { darray_for_each(buckets, i) {
bch2_bucket_nocow_unlock(&c->nocow_locks, bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE);
buckets[i].b, if (i == stale_at)
BUCKET_NOCOW_LOCK_UPDATE); break;
--i;
} }
for (i = 0; i < nr_buckets; i++)
percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
/* We can retry this: */ /* We can retry this: */
ret = -BCH_ERR_transaction_restart; ret = -BCH_ERR_transaction_restart;
goto out; goto err_get_ioref;
} }
static void __bch2_write(struct bch_write_op *op) static void __bch2_write(struct bch_write_op *op)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment