Commit 890b74f0 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fsck for reflink refcounts

Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent c0ebe3e4
......@@ -391,6 +391,14 @@ struct gc_pos {
unsigned level;
};
struct reflink_gc {
u64 offset;
u32 size;
u32 refcount;
};
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
......@@ -806,6 +814,9 @@ mempool_t bio_bounce_pages;
/* REFLINK */
u64 reflink_hint;
reflink_gc_table reflink_gc_table;
size_t reflink_gc_nr;
size_t reflink_gc_idx;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
......
......@@ -23,6 +23,7 @@
#include "keylist.h"
#include "move.h"
#include "recovery.h"
#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
#include "trace.h"
......@@ -1285,6 +1286,201 @@ static int bch2_gc_start(struct bch_fs *c,
return 0;
}
static int bch2_gc_reflink_done_initial_fn(struct bch_fs *c, struct bkey_s_c k)
{
struct reflink_gc *r;
const __le64 *refcount = bkey_refcount_c(k);
char buf[200];
int ret = 0;
if (!refcount)
return 0;
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
if (!r)
return -ENOMEM;
if (!r ||
r->offset != k.k->p.offset ||
r->size != k.k->size) {
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
return -EINVAL;
}
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
"reflink key has wrong refcount:\n"
" %s\n"
" should be %u",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
r->refcount)) {
struct bkey_i *new;
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
goto fsck_err;
}
bkey_reassemble(new, k);
if (!r->refcount) {
new->k.type = KEY_TYPE_deleted;
new->k.size = 0;
} else {
*bkey_refcount(new) = cpu_to_le64(r->refcount);
}
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
if (ret)
kfree(new);
}
fsck_err:
return ret;
}
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct reflink_gc *r;
size_t idx = 0;
char buf[200];
int ret = 0;
if (metadata_only)
return 0;
if (initial) {
c->reflink_gc_idx = 0;
ret = bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
bch2_gc_reflink_done_initial_fn);
goto out;
}
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
r = genradix_ptr(&c->reflink_gc_table, idx);
if (!r ||
r->offset != k.k->p.offset ||
r->size != k.k->size) {
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
ret = -EINVAL;
break;
}
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
"reflink key has wrong refcount:\n"
" %s\n"
" should be %u",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
r->refcount)) {
struct bkey_i *new;
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
break;
}
bkey_reassemble(new, k);
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
else
*bkey_refcount(new) = cpu_to_le64(r->refcount);
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
kfree(new);
if (ret)
break;
}
}
fsck_err:
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
out:
genradix_free(&c->reflink_gc_table);
c->reflink_gc_nr = 0;
return ret;
}
static int bch2_gc_reflink_start_initial_fn(struct bch_fs *c, struct bkey_s_c k)
{
struct reflink_gc *r;
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
return 0;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r)
return -ENOMEM;
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
return 0;
}
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct reflink_gc *r;
int ret;
if (metadata_only)
return 0;
genradix_free(&c->reflink_gc_table);
c->reflink_gc_nr = 0;
if (initial)
return bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
bch2_gc_reflink_start_initial_fn);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
ret = -ENOMEM;
break;
}
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
}
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
return 0;
}
/**
* bch2_gc - walk _all_ references to buckets, and recompute them:
*
......@@ -1319,7 +1515,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
again:
ret = bch2_gc_start(c, metadata_only);
ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_reflink_start(c, initial, metadata_only);
if (ret)
goto out;
......@@ -1381,7 +1578,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
bch2_journal_block(&c->journal);
percpu_down_write(&c->mark_lock);
ret = bch2_gc_done(c, initial, metadata_only);
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
bch2_gc_done(c, initial, metadata_only);
bch2_journal_unblock(&c->journal);
} else {
......
......@@ -14,6 +14,7 @@
#include "ec.h"
#include "error.h"
#include "movinggc.h"
#include "reflink.h"
#include "replicas.h"
#include "trace.h"
......@@ -1076,6 +1077,124 @@ static int bch2_mark_stripe(struct bch_fs *c,
return 0;
}
static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
u64 p_start, u64 p_end,
u64 v_start, u64 v_end)
{
if (p_start == p_end)
return false;
p_start += le64_to_cpu(p.v->idx);
p_end += le64_to_cpu(p.v->idx);
if (p_end <= v_start)
return false;
if (p_start >= v_end)
return false;
return true;
}
static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
u64 start, u64 end,
struct bkey_s_c k)
{
return __reflink_p_frag_references(p, start, end,
bkey_start_offset(k.k),
k.k->p.offset);
}
static int __bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
unsigned front_frag,
unsigned back_frag,
unsigned flags,
size_t *r_idx)
{
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
int frags_referenced;
while (1) {
if (*r_idx >= c->reflink_gc_nr)
goto not_found;
r = genradix_ptr(&c->reflink_gc_table, *r_idx);
BUG_ON(!r);
if (r->offset > idx)
break;
(*r_idx)++;
}
frags_referenced =
__reflink_p_frag_references(p, 0, front_frag,
r->offset - r->size, r->offset) +
__reflink_p_frag_references(p, back_frag, p.k->size,
r->offset - r->size, r->offset);
if (frags_referenced == 2) {
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
add = -add;
} else if (frags_referenced == 1) {
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
add = 0;
}
BUG_ON((s64) r->refcount + add < 0);
r->refcount += add;
return min_t(u64, sectors, r->offset - idx);
not_found:
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
bch2_inconsistent_error(c);
return -EIO;
}
static int bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p, unsigned offset,
s64 sectors, unsigned flags)
{
u64 idx = le64_to_cpu(p.v->idx) + offset;
struct reflink_gc *ref;
size_t l, r, m;
unsigned front_frag, back_frag;
s64 ret = 0;
if (sectors < 0)
sectors = -sectors;
BUG_ON(offset + sectors > p.k->size);
front_frag = offset;
back_frag = offset + sectors;
l = 0;
r = c->reflink_gc_nr;
while (l < r) {
m = l + (r - l) / 2;
ref = genradix_ptr(&c->reflink_gc_table, m);
if (ref->offset <= idx)
l = m + 1;
else
r = m;
}
while (sectors) {
ret = __bch2_mark_reflink_p(c, p, idx, sectors,
front_frag, back_frag, flags, &l);
if (ret < 0)
return ret;
idx += ret;
sectors -= ret;
}
return 0;
}
static int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c old,
struct bkey_s_c new,
......@@ -1131,6 +1250,10 @@ static int bch2_mark_key_locked(struct bch_fs *c,
fs_usage->persistent_reserved[replicas - 1] += sectors;
break;
}
case KEY_TYPE_reflink_p:
ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k),
offset, sectors, flags);
break;
}
preempt_enable();
......@@ -1693,35 +1816,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
return ret;
}
static __le64 *bkey_refcount(struct bkey_i *k)
{
switch (k->k.type) {
case KEY_TYPE_reflink_v:
return &bkey_i_to_reflink_v(k)->v.refcount;
case KEY_TYPE_indirect_inline_data:
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
default:
return NULL;
}
}
static bool reflink_p_frag_references(struct bkey_s_c_reflink_p p,
u64 start, u64 end,
struct bkey_s_c k)
{
if (start == end)
return false;
start += le64_to_cpu(p.v->idx);
end += le64_to_cpu(p.v->idx);
if (end <= bkey_start_offset(k.k))
return false;
if (start >= k.k->p.offset)
return false;
return true;
}
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
......
......@@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
refcount = (void *) &r_v->v;
refcount = bkey_refcount(r_v);
*refcount = 0;
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
......
......@@ -34,6 +34,30 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
.val_to_text = bch2_indirect_inline_data_to_text, \
}
static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_reflink_v:
return &bkey_s_c_to_reflink_v(k).v->refcount;
case KEY_TYPE_indirect_inline_data:
return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
default:
return NULL;
}
}
static inline __le64 *bkey_refcount(struct bkey_i *k)
{
switch (k->k.type) {
case KEY_TYPE_reflink_v:
return &bkey_i_to_reflink_v(k)->v.refcount;
case KEY_TYPE_indirect_inline_data:
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
default:
return NULL;
}
}
s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
u64, u64 *, u64, s64 *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment