Commit 7bd4d370 authored by Javier González's avatar Javier González Committed by Jens Axboe

lightnvm: pblk: guarantee line integrity on reads

When a line is recycled during garbage collection, reads can still be
issued to the line. If the line is freed in the middle of this process,
data corruption might occur.

This patch guarantees that lines are not freed in the middle of reads
that target them (lines). Specifically, we use the existing line
reference to decide when a line is eligible for being freed after the
recycle process.
Signed-off-by: default avatarJavier González <javier@cnexlabs.com>
Signed-off-by: default avatarMatias Bjørling <m@bjorling.me>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent a4809fee
...@@ -1460,10 +1460,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line) ...@@ -1460,10 +1460,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
line->emeta = NULL; line->emeta = NULL;
} }
void pblk_line_put(struct kref *ref) static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
{ {
struct pblk_line *line = container_of(ref, struct pblk_line, ref);
struct pblk *pblk = line->pblk;
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
spin_lock(&line->lock); spin_lock(&line->lock);
...@@ -1481,6 +1479,43 @@ void pblk_line_put(struct kref *ref) ...@@ -1481,6 +1479,43 @@ void pblk_line_put(struct kref *ref)
pblk_rl_free_lines_inc(&pblk->rl, line); pblk_rl_free_lines_inc(&pblk->rl, line);
} }
static void pblk_line_put_ws(struct work_struct *work)
{
struct pblk_line_ws *line_put_ws = container_of(work,
struct pblk_line_ws, ws);
struct pblk *pblk = line_put_ws->pblk;
struct pblk_line *line = line_put_ws->line;
__pblk_line_put(pblk, line);
mempool_free(line_put_ws, pblk->gen_ws_pool);
}
void pblk_line_put(struct kref *ref)
{
struct pblk_line *line = container_of(ref, struct pblk_line, ref);
struct pblk *pblk = line->pblk;
__pblk_line_put(pblk, line);
}
void pblk_line_put_wq(struct kref *ref)
{
struct pblk_line *line = container_of(ref, struct pblk_line, ref);
struct pblk *pblk = line->pblk;
struct pblk_line_ws *line_put_ws;
line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC);
if (!line_put_ws)
return;
line_put_ws->pblk = pblk;
line_put_ws->line = line;
line_put_ws->priv = NULL;
INIT_WORK(&line_put_ws->ws, pblk_line_put_ws);
queue_work(pblk->r_end_wq, &line_put_ws->ws);
}
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
{ {
struct nvm_rq *rqd; struct nvm_rq *rqd;
...@@ -1878,8 +1913,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, ...@@ -1878,8 +1913,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
int i; int i;
spin_lock(&pblk->trans_lock); spin_lock(&pblk->trans_lock);
for (i = 0; i < nr_secs; i++) for (i = 0; i < nr_secs; i++) {
ppas[i] = pblk_trans_map_get(pblk, blba + i); struct ppa_addr ppa;
ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i);
/* If the L2P entry maps to a line, the reference is valid */
if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
int line_id = pblk_dev_ppa_to_line(ppa);
struct pblk_line *line = &pblk->lines[line_id];
kref_get(&line->ref);
}
}
spin_unlock(&pblk->trans_lock); spin_unlock(&pblk->trans_lock);
} }
......
...@@ -271,15 +271,22 @@ static int pblk_core_init(struct pblk *pblk) ...@@ -271,15 +271,22 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->bb_wq) if (!pblk->bb_wq)
goto free_close_wq; goto free_close_wq;
if (pblk_set_ppaf(pblk)) pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!pblk->r_end_wq)
goto free_bb_wq; goto free_bb_wq;
if (pblk_set_ppaf(pblk))
goto free_r_end_wq;
if (pblk_rwb_init(pblk)) if (pblk_rwb_init(pblk))
goto free_bb_wq; goto free_r_end_wq;
INIT_LIST_HEAD(&pblk->compl_list); INIT_LIST_HEAD(&pblk->compl_list);
return 0; return 0;
free_r_end_wq:
destroy_workqueue(pblk->r_end_wq);
free_bb_wq: free_bb_wq:
destroy_workqueue(pblk->bb_wq); destroy_workqueue(pblk->bb_wq);
free_close_wq: free_close_wq:
...@@ -304,6 +311,9 @@ static void pblk_core_free(struct pblk *pblk) ...@@ -304,6 +311,9 @@ static void pblk_core_free(struct pblk *pblk)
if (pblk->close_wq) if (pblk->close_wq)
destroy_workqueue(pblk->close_wq); destroy_workqueue(pblk->close_wq);
if (pblk->r_end_wq)
destroy_workqueue(pblk->r_end_wq);
if (pblk->bb_wq) if (pblk->bb_wq)
destroy_workqueue(pblk->bb_wq); destroy_workqueue(pblk->bb_wq);
......
...@@ -130,9 +130,34 @@ static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -130,9 +130,34 @@ static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
} }
} }
static void pblk_end_io_read(struct nvm_rq *rqd) static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd)
{
struct ppa_addr *ppa_list;
int i;
ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
for (i = 0; i < rqd->nr_ppas; i++) {
struct ppa_addr ppa = ppa_list[i];
struct pblk_line *line;
line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
kref_put(&line->ref, pblk_line_put_wq);
}
}
static void pblk_end_user_read(struct bio *bio)
{
#ifdef CONFIG_NVM_DEBUG
WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
#endif
bio_endio(bio);
bio_put(bio);
}
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
bool put_line)
{ {
struct pblk *pblk = rqd->private;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio; struct bio *bio = rqd->bio;
...@@ -146,15 +171,11 @@ static void pblk_end_io_read(struct nvm_rq *rqd) ...@@ -146,15 +171,11 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
pblk_read_check(pblk, rqd, r_ctx->lba); pblk_read_check(pblk, rqd, r_ctx->lba);
bio_put(bio); bio_put(bio);
if (r_ctx->private) { if (r_ctx->private)
struct bio *orig_bio = r_ctx->private; pblk_end_user_read((struct bio *)r_ctx->private);
#ifdef CONFIG_NVM_DEBUG if (put_line)
WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n"); pblk_read_put_rqd_kref(pblk, rqd);
#endif
bio_endio(orig_bio);
bio_put(orig_bio);
}
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
...@@ -165,6 +186,13 @@ static void pblk_end_io_read(struct nvm_rq *rqd) ...@@ -165,6 +186,13 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
atomic_dec(&pblk->inflight_io); atomic_dec(&pblk->inflight_io);
} }
static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
__pblk_end_io_read(pblk, rqd, true);
}
static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int bio_init_idx, unsigned int bio_init_idx,
unsigned long *read_bitmap) unsigned long *read_bitmap)
...@@ -233,8 +261,12 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -233,8 +261,12 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
} }
if (unlikely(nr_secs > 1 && nr_holes == 1)) { if (unlikely(nr_secs > 1 && nr_holes == 1)) {
struct ppa_addr ppa;
ppa = rqd->ppa_addr;
rqd->ppa_list = ppa_ptr; rqd->ppa_list = ppa_ptr;
rqd->dma_ppa_list = dma_ppa_list; rqd->dma_ppa_list = dma_ppa_list;
rqd->ppa_list[0] = ppa;
} }
for (i = 0; i < nr_secs; i++) { for (i = 0; i < nr_secs; i++) {
...@@ -246,6 +278,11 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -246,6 +278,11 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
i = 0; i = 0;
hole = find_first_zero_bit(read_bitmap, nr_secs); hole = find_first_zero_bit(read_bitmap, nr_secs);
do { do {
int line_id = pblk_dev_ppa_to_line(rqd->ppa_list[i]);
struct pblk_line *line = &pblk->lines[line_id];
kref_put(&line->ref, pblk_line_put);
meta_list[hole].lba = lba_list_media[i]; meta_list[hole].lba = lba_list_media[i];
src_bv = new_bio->bi_io_vec[i++]; src_bv = new_bio->bi_io_vec[i++];
...@@ -269,19 +306,17 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -269,19 +306,17 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
bio_put(new_bio); bio_put(new_bio);
/* Complete the original bio and associated request */ /* Complete the original bio and associated request */
bio_endio(bio);
rqd->bio = bio; rqd->bio = bio;
rqd->nr_ppas = nr_secs; rqd->nr_ppas = nr_secs;
rqd->private = pblk;
bio_endio(bio); __pblk_end_io_read(pblk, rqd, false);
pblk_end_io_read(rqd);
return NVM_IO_OK; return NVM_IO_OK;
err: err:
/* Free allocated pages in new bio */ /* Free allocated pages in new bio */
pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
rqd->private = pblk; __pblk_end_io_read(pblk, rqd, false);
pblk_end_io_read(rqd);
return NVM_IO_ERR; return NVM_IO_ERR;
} }
...@@ -314,11 +349,11 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -314,11 +349,11 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
goto retry; goto retry;
} }
WARN_ON(test_and_set_bit(0, read_bitmap));
meta_list[0].lba = cpu_to_le64(lba); meta_list[0].lba = cpu_to_le64(lba);
WARN_ON(test_and_set_bit(0, read_bitmap));
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
atomic_long_inc(&pblk->cache_reads); atomic_long_inc(&pblk->cache_reads);
#endif #endif
} else { } else {
rqd->ppa_addr = ppa; rqd->ppa_addr = ppa;
...@@ -383,7 +418,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) ...@@ -383,7 +418,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
if (bitmap_full(&read_bitmap, nr_secs)) { if (bitmap_full(&read_bitmap, nr_secs)) {
bio_endio(bio); bio_endio(bio);
atomic_inc(&pblk->inflight_io); atomic_inc(&pblk->inflight_io);
pblk_end_io_read(rqd); __pblk_end_io_read(pblk, rqd, false);
return NVM_IO_OK; return NVM_IO_OK;
} }
......
...@@ -636,6 +636,7 @@ struct pblk { ...@@ -636,6 +636,7 @@ struct pblk {
struct workqueue_struct *close_wq; struct workqueue_struct *close_wq;
struct workqueue_struct *bb_wq; struct workqueue_struct *bb_wq;
struct workqueue_struct *r_end_wq;
struct timer_list wtimer; struct timer_list wtimer;
...@@ -741,6 +742,7 @@ int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, ...@@ -741,6 +742,7 @@ int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
void *emeta_buf); void *emeta_buf);
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
void pblk_line_put(struct kref *ref); void pblk_line_put(struct kref *ref);
void pblk_line_put_wq(struct kref *ref);
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line); u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment