Commit 43f9b25a authored by Artem Bityutskiy's avatar Artem Bityutskiy

UBI: bugfix: protect from volume removal

When the WL worker is moving an LEB, the volume might go away
occasionally. UBI does not handle these situations correctly.

This patch introduces a new mutex which serializes wear-levelling
worker and the the 'ubi_wl_put_peb()' function. Now, if one puts
an LEB, and its PEB is being moved, it will wait on the mutex.
And because we unmap all LEBs when removing volumes, this will make
the volume remove function to wait while the LEB movement
finishes.

Below is an example of an oops which should be fixed by this patch:

Pid: 9167, comm: io_paral Not tainted (2.6.24-rc5-ubi-2.6.git #2)
EIP: 0060:[<f884a379>] EFLAGS: 00010246 CPU: 0
EIP is at prot_tree_del+0x2a/0x63 [ubi]
EAX: f39a90e0 EBX: 00000000 ECX: 00000000 EDX: 00000134
ESI: f39a90e0 EDI: f39a90e0 EBP: f2d55ddc ESP: f2d55dd4
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process io_paral (pid: 9167, ti=f2d54000 task=f72a8030 task.ti=f2d54000)
Stack: f39a95f8 ef6aae50 f2d55e08 f884a511 f88538e1 f884ecea 00000134 00000000
       f39a9604 f39a95f0 efea8280 00000000 f39a90e0 f2d55e40 f8847261 f8850c3c
       f884eaad 00000001 000000b9 00000134 00000172 000000b9 00000134 00000001
Call Trace:
 [<c0105227>] show_trace_log_lvl+0x1a/0x30
 [<c01052e2>] show_stack_log_lvl+0xa5/0xca
 [<c01053d6>] show_registers+0xcf/0x21b
 [<c0105648>] die+0x126/0x224
 [<c0119a62>] do_page_fault+0x27f/0x60d
 [<c037dd62>] error_code+0x72/0x78
 [<f884a511>] ubi_wl_put_peb+0xf0/0x191 [ubi]
 [<f8847261>] ubi_eba_unmap_leb+0xaf/0xcc [ubi]
 [<f8843c21>] ubi_remove_volume+0x102/0x1e8 [ubi]
 [<f8846077>] ubi_cdev_ioctl+0x22a/0x383 [ubi]
 [<c017d768>] do_ioctl+0x68/0x71
 [<c017d7c6>] vfs_ioctl+0x55/0x271
 [<c017da15>] sys_ioctl+0x33/0x52
 [<c0104152>] sysenter_past_esp+0x5f/0xa5
 =======================
Signed-off-by: default avatarArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
parent d2c46855
...@@ -259,6 +259,44 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) ...@@ -259,6 +259,44 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
return 0; return 0;
} }
/**
* leb_write_lock - lock logical eraseblock for writing.
* @ubi: UBI device description object
* @vol_id: volume ID
* @lnum: logical eraseblock number
*
* This function locks a logical eraseblock for writing if there is no
* contention and does nothing if there is contention. Returns %0 in case of
* success, %1 in case of contention, and and a negative error code in case of
* failure.
*/
static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
{
int free;
struct ubi_ltree_entry *le;
le = ltree_add_entry(ubi, vol_id, lnum);
if (IS_ERR(le))
return PTR_ERR(le);
if (down_write_trylock(&le->mutex))
return 0;
/* Contention, cancel */
spin_lock(&ubi->ltree_lock);
le->users -= 1;
ubi_assert(le->users >= 0);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
free = 1;
} else
free = 0;
spin_unlock(&ubi->ltree_lock);
if (free)
kmem_cache_free(ubi_ltree_slab, le);
return 1;
}
/** /**
* leb_write_unlock - unlock logical eraseblock. * leb_write_unlock - unlock logical eraseblock.
* @ubi: UBI device description object * @ubi: UBI device description object
...@@ -923,14 +961,16 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, ...@@ -923,14 +961,16 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
* *
* This function copies logical eraseblock from physical eraseblock @from to * This function copies logical eraseblock from physical eraseblock @from to
* physical eraseblock @to. The @vid_hdr buffer may be changed by this * physical eraseblock @to. The @vid_hdr buffer may be changed by this
* function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation * function. Returns:
* was canceled because bit-flips were detected at the target PEB, and a * o %0 in case of success;
* negative error code in case of failure. * o %1 if the operation was canceled and should be tried later (e.g.,
* because a bit-flip was detected at the target PEB);
* o %2 if the volume is being deleted and this LEB should not be moved.
*/ */
int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
struct ubi_vid_hdr *vid_hdr) struct ubi_vid_hdr *vid_hdr)
{ {
int err, vol_id, lnum, data_size, aldata_size, pnum, idx; int err, vol_id, lnum, data_size, aldata_size, idx;
struct ubi_volume *vol; struct ubi_volume *vol;
uint32_t crc; uint32_t crc;
...@@ -946,57 +986,67 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -946,57 +986,67 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
data_size = aldata_size = data_size = aldata_size =
ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); ubi->leb_size - be32_to_cpu(vid_hdr->data_pad);
/*
* We do not want anybody to write to this logical eraseblock while we
* are moving it, so we lock it.
*/
err = leb_write_lock(ubi, vol_id, lnum);
if (err)
return err;
mutex_lock(&ubi->buf_mutex);
/*
* But the logical eraseblock might have been put by this time.
* Cancel if it is true.
*/
idx = vol_id2idx(ubi, vol_id); idx = vol_id2idx(ubi, vol_id);
spin_lock(&ubi->volumes_lock);
/* /*
* We may race with volume deletion/re-size, so we have to hold * Note, we may race with volume deletion, which means that the volume
* @ubi->volumes_lock. * this logical eraseblock belongs to might be being deleted. Since the
* * volume deletion unmaps all the volume's logical eraseblocks, it will
* Note, it is not a problem if we race with volume deletion or re-size * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish.
* here. If the volume is deleted or re-sized while we are moving an
* eraseblock which belongs to this volume, we'll end up with finding
* out that this LEB was unmapped at the end (see WL), and drop this
* PEB.
*/ */
spin_lock(&ubi->volumes_lock);
vol = ubi->volumes[idx]; vol = ubi->volumes[idx];
if (!vol) { if (!vol) {
dbg_eba("volume %d was removed meanwhile", vol_id); /* No need to do further work, cancel */
dbg_eba("volume %d is being removed, cancel", vol_id);
spin_unlock(&ubi->volumes_lock); spin_unlock(&ubi->volumes_lock);
goto out_unlock; return 2;
} }
pnum = vol->eba_tbl[lnum];
if (pnum != from) {
dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
"PEB %d, cancel", vol_id, lnum, from, pnum);
spin_unlock(&ubi->volumes_lock); spin_unlock(&ubi->volumes_lock);
goto out_unlock;
/*
* We do not want anybody to write to this logical eraseblock while we
* are moving it, so lock it.
*
* Note, we are using non-waiting locking here, because we cannot sleep
* on the LEB, since it may cause deadlocks. Indeed, imagine a task is
* unmapping the LEB which is mapped to the PEB we are going to move
* (@from). This task locks the LEB and goes sleep in the
* 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are
* holding @ubi->move_mutex and go sleep on the LEB lock. So, if the
* LEB is already locked, we just do not move it and return %1.
*/
err = leb_write_trylock(ubi, vol_id, lnum);
if (err) {
dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum);
return err;
} }
spin_unlock(&ubi->volumes_lock);
/* OK, now the LEB is locked and we can safely start moving it */ /*
* The LEB might have been put meanwhile, and the task which put it is
* probably waiting on @ubi->move_mutex. No need to continue the work,
* cancel it.
*/
if (vol->eba_tbl[lnum] != from) {
dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
"PEB %d, cancel", vol_id, lnum, from,
vol->eba_tbl[lnum]);
err = 1;
goto out_unlock_leb;
}
/*
* OK, now the LEB is locked and we can safely start moving iy. Since
* this function utilizes thie @ubi->peb1_buf buffer which is shared
* with some other functions, so lock the buffer by taking the
* @ubi->buf_mutex.
*/
mutex_lock(&ubi->buf_mutex);
dbg_eba("read %d bytes of data", aldata_size); dbg_eba("read %d bytes of data", aldata_size);
err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size);
if (err && err != UBI_IO_BITFLIPS) { if (err && err != UBI_IO_BITFLIPS) {
ubi_warn("error %d while reading data from PEB %d", ubi_warn("error %d while reading data from PEB %d",
err, from); err, from);
goto out_unlock; goto out_unlock_buf;
} }
/* /*
...@@ -1032,7 +1082,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1032,7 +1082,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
if (err) if (err)
goto out_unlock; goto out_unlock_buf;
cond_resched(); cond_resched();
...@@ -1041,13 +1091,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1041,13 +1091,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
if (err) { if (err) {
if (err != UBI_IO_BITFLIPS) if (err != UBI_IO_BITFLIPS)
ubi_warn("cannot read VID header back from PEB %d", to); ubi_warn("cannot read VID header back from PEB %d", to);
goto out_unlock; else
err = 1;
goto out_unlock_buf;
} }
if (data_size > 0) { if (data_size > 0) {
err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
if (err) if (err)
goto out_unlock; goto out_unlock_buf;
cond_resched(); cond_resched();
...@@ -1061,7 +1113,9 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1061,7 +1113,9 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
if (err != UBI_IO_BITFLIPS) if (err != UBI_IO_BITFLIPS)
ubi_warn("cannot read data back from PEB %d", ubi_warn("cannot read data back from PEB %d",
to); to);
goto out_unlock; else
err = 1;
goto out_unlock_buf;
} }
cond_resched(); cond_resched();
...@@ -1069,15 +1123,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ...@@ -1069,15 +1123,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
ubi_warn("read data back from PEB %d - it is different", ubi_warn("read data back from PEB %d - it is different",
to); to);
goto out_unlock; goto out_unlock_buf;
} }
} }
ubi_assert(vol->eba_tbl[lnum] == from); ubi_assert(vol->eba_tbl[lnum] == from);
vol->eba_tbl[lnum] = to; vol->eba_tbl[lnum] = to;
out_unlock: out_unlock_buf:
mutex_unlock(&ubi->buf_mutex); mutex_unlock(&ubi->buf_mutex);
out_unlock_leb:
leb_write_unlock(ubi, vol_id, lnum); leb_write_unlock(ubi, vol_id, lnum);
return err; return err;
} }
......
...@@ -275,13 +275,13 @@ struct ubi_wl_entry; ...@@ -275,13 +275,13 @@ struct ubi_wl_entry;
* @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
* @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
* fields * fields
* @move_mutex: serializes eraseblock moves
* @wl_scheduled: non-zero if the wear-leveling was scheduled * @wl_scheduled: non-zero if the wear-leveling was scheduled
* @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
* physical eraseblock * physical eraseblock
* @abs_ec: absolute erase counter * @abs_ec: absolute erase counter
* @move_from: physical eraseblock from where the data is being moved * @move_from: physical eraseblock from where the data is being moved
* @move_to: physical eraseblock where the data is being moved to * @move_to: physical eraseblock where the data is being moved to
* @move_from_put: if the "from" PEB was put
* @move_to_put: if the "to" PEB was put * @move_to_put: if the "to" PEB was put
* @works: list of pending works * @works: list of pending works
* @works_count: count of pending works * @works_count: count of pending works
...@@ -354,12 +354,12 @@ struct ubi_device { ...@@ -354,12 +354,12 @@ struct ubi_device {
struct rb_root aec; struct rb_root aec;
} prot; } prot;
spinlock_t wl_lock; spinlock_t wl_lock;
struct mutex move_mutex;
int wl_scheduled; int wl_scheduled;
struct ubi_wl_entry **lookuptbl; struct ubi_wl_entry **lookuptbl;
unsigned long long abs_ec; unsigned long long abs_ec;
struct ubi_wl_entry *move_from; struct ubi_wl_entry *move_from;
struct ubi_wl_entry *move_to; struct ubi_wl_entry *move_to;
int move_from_put;
int move_to_put; int move_to_put;
struct list_head works; struct list_head works;
int works_count; int works_count;
...@@ -561,8 +561,10 @@ static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf, ...@@ -561,8 +561,10 @@ static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf,
*/ */
static inline void ubi_ro_mode(struct ubi_device *ubi) static inline void ubi_ro_mode(struct ubi_device *ubi)
{ {
if (!ubi->ro_mode) {
ubi->ro_mode = 1; ubi->ro_mode = 1;
ubi_warn("switch to read-only mode"); ubi_warn("switch to read-only mode");
}
} }
/** /**
......
...@@ -249,6 +249,8 @@ static int do_work(struct ubi_device *ubi) ...@@ -249,6 +249,8 @@ static int do_work(struct ubi_device *ubi)
int err; int err;
struct ubi_work *wrk; struct ubi_work *wrk;
cond_resched();
spin_lock(&ubi->wl_lock); spin_lock(&ubi->wl_lock);
if (list_empty(&ubi->works)) { if (list_empty(&ubi->works)) {
...@@ -531,8 +533,12 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) ...@@ -531,8 +533,12 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
* prot_tree_del - remove a physical eraseblock from the protection trees * prot_tree_del - remove a physical eraseblock from the protection trees
* @ubi: UBI device description object * @ubi: UBI device description object
* @pnum: the physical eraseblock to remove * @pnum: the physical eraseblock to remove
*
* This function returns PEB @pnum from the protection trees and returns zero
* in case of success and %-ENODEV if the PEB was not found in the protection
* trees.
*/ */
static void prot_tree_del(struct ubi_device *ubi, int pnum) static int prot_tree_del(struct ubi_device *ubi, int pnum)
{ {
struct rb_node *p; struct rb_node *p;
struct ubi_wl_prot_entry *pe = NULL; struct ubi_wl_prot_entry *pe = NULL;
...@@ -543,7 +549,7 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum) ...@@ -543,7 +549,7 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum)
pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
if (pnum == pe->e->pnum) if (pnum == pe->e->pnum)
break; goto found;
if (pnum < pe->e->pnum) if (pnum < pe->e->pnum)
p = p->rb_left; p = p->rb_left;
...@@ -551,10 +557,14 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum) ...@@ -551,10 +557,14 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum)
p = p->rb_right; p = p->rb_right;
} }
return -ENODEV;
found:
ubi_assert(pe->e->pnum == pnum); ubi_assert(pe->e->pnum == pnum);
rb_erase(&pe->rb_aec, &ubi->prot.aec); rb_erase(&pe->rb_aec, &ubi->prot.aec);
rb_erase(&pe->rb_pnum, &ubi->prot.pnum); rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
kfree(pe); kfree(pe);
return 0;
} }
/** /**
...@@ -726,7 +736,8 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, ...@@ -726,7 +736,8 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
int cancel) int cancel)
{ {
int err, put = 0; int err, put = 0, scrubbing = 0, protect = 0;
struct ubi_wl_prot_entry *pe;
struct ubi_wl_entry *e1, *e2; struct ubi_wl_entry *e1, *e2;
struct ubi_vid_hdr *vid_hdr; struct ubi_vid_hdr *vid_hdr;
...@@ -739,21 +750,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -739,21 +750,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
if (!vid_hdr) if (!vid_hdr)
return -ENOMEM; return -ENOMEM;
mutex_lock(&ubi->move_mutex);
spin_lock(&ubi->wl_lock); spin_lock(&ubi->wl_lock);
ubi_assert(!ubi->move_from && !ubi->move_to);
ubi_assert(!ubi->move_to_put);
/* if (!ubi->free.rb_node ||
* Only one WL worker at a time is supported at this implementation, so
* make sure a PEB is not being moved already.
*/
if (ubi->move_to || !ubi->free.rb_node ||
(!ubi->used.rb_node && !ubi->scrub.rb_node)) { (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
/* /*
* Only one WL worker at a time is supported at this * No free physical eraseblocks? Well, they must be waiting in
* implementation, so if a LEB is already being moved, cancel. * the queue to be erased. Cancel movement - it will be
* * triggered again when a free physical eraseblock appears.
* No free physical eraseblocks? Well, we cancel wear-leveling
* then. It will be triggered again when a free physical
* eraseblock appears.
* *
* No used physical eraseblocks? They must be temporarily * No used physical eraseblocks? They must be temporarily
* protected from being moved. They will be moved to the * protected from being moved. They will be moved to the
...@@ -762,10 +769,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -762,10 +769,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
*/ */
dbg_wl("cancel WL, a list is empty: free %d, used %d", dbg_wl("cancel WL, a list is empty: free %d, used %d",
!ubi->free.rb_node, !ubi->used.rb_node); !ubi->free.rb_node, !ubi->used.rb_node);
ubi->wl_scheduled = 0; goto out_cancel;
spin_unlock(&ubi->wl_lock);
ubi_free_vid_hdr(ubi, vid_hdr);
return 0;
} }
if (!ubi->scrub.rb_node) { if (!ubi->scrub.rb_node) {
...@@ -780,16 +784,15 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -780,16 +784,15 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
dbg_wl("no WL needed: min used EC %d, max free EC %d", dbg_wl("no WL needed: min used EC %d, max free EC %d",
e1->ec, e2->ec); e1->ec, e2->ec);
ubi->wl_scheduled = 0; goto out_cancel;
spin_unlock(&ubi->wl_lock);
ubi_free_vid_hdr(ubi, vid_hdr);
return 0;
} }
paranoid_check_in_wl_tree(e1, &ubi->used); paranoid_check_in_wl_tree(e1, &ubi->used);
rb_erase(&e1->rb, &ubi->used); rb_erase(&e1->rb, &ubi->used);
dbg_wl("move PEB %d EC %d to PEB %d EC %d", dbg_wl("move PEB %d EC %d to PEB %d EC %d",
e1->pnum, e1->ec, e2->pnum, e2->ec); e1->pnum, e1->ec, e2->pnum, e2->ec);
} else { } else {
/* Perform scrubbing */
scrubbing = 1;
e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
paranoid_check_in_wl_tree(e1, &ubi->scrub); paranoid_check_in_wl_tree(e1, &ubi->scrub);
...@@ -799,8 +802,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -799,8 +802,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
paranoid_check_in_wl_tree(e2, &ubi->free); paranoid_check_in_wl_tree(e2, &ubi->free);
rb_erase(&e2->rb, &ubi->free); rb_erase(&e2->rb, &ubi->free);
ubi_assert(!ubi->move_from && !ubi->move_to);
ubi_assert(!ubi->move_to_put && !ubi->move_from_put);
ubi->move_from = e1; ubi->move_from = e1;
ubi->move_to = e2; ubi->move_to = e2;
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
...@@ -810,6 +811,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -810,6 +811,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
* We so far do not know which logical eraseblock our physical * We so far do not know which logical eraseblock our physical
* eraseblock (@e1) belongs to. We have to read the volume identifier * eraseblock (@e1) belongs to. We have to read the volume identifier
* header first. * header first.
*
* Note, we are protected from this PEB being unmapped and erased. The
* 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
* which is being moved was unmapped.
*/ */
err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
...@@ -824,32 +829,51 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -824,32 +829,51 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
* likely have the VID header in place. * likely have the VID header in place.
*/ */
dbg_wl("PEB %d has no VID header", e1->pnum); dbg_wl("PEB %d has no VID header", e1->pnum);
err = 0; goto out_not_moved;
} else { }
ubi_err("error %d while reading VID header from PEB %d", ubi_err("error %d while reading VID header from PEB %d",
err, e1->pnum); err, e1->pnum);
if (err > 0) if (err > 0)
err = -EIO; err = -EIO;
} goto out_error;
goto error;
} }
err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
if (err) { if (err) {
if (err == UBI_IO_BITFLIPS)
err = 0; if (err < 0)
goto error; goto out_error;
if (err == 1)
goto out_not_moved;
/*
* For some reason the LEB was not moved - it might be because
* the volume is being deleted. We should prevent this PEB from
* being selected for wear-levelling movement for some "time",
* so put it to the protection tree.
*/
dbg_wl("cancelled moving PEB %d", e1->pnum);
pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
if (!pe) {
err = -ENOMEM;
goto out_error;
}
protect = 1;
} }
ubi_free_vid_hdr(ubi, vid_hdr); ubi_free_vid_hdr(ubi, vid_hdr);
spin_lock(&ubi->wl_lock); spin_lock(&ubi->wl_lock);
if (protect)
prot_tree_add(ubi, e1, pe, protect);
if (!ubi->move_to_put) if (!ubi->move_to_put)
wl_tree_add(e2, &ubi->used); wl_tree_add(e2, &ubi->used);
else else
put = 1; put = 1;
ubi->move_from = ubi->move_to = NULL; ubi->move_from = ubi->move_to = NULL;
ubi->move_from_put = ubi->move_to_put = 0; ubi->move_to_put = ubi->wl_scheduled = 0;
ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
if (put) { if (put) {
...@@ -859,62 +883,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ...@@ -859,62 +883,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
*/ */
dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
err = schedule_erase(ubi, e2, 0); err = schedule_erase(ubi, e2, 0);
if (err) { if (err)
kmem_cache_free(ubi_wl_entry_slab, e2); goto out_error;
ubi_ro_mode(ubi);
}
} }
if (!protect) {
err = schedule_erase(ubi, e1, 0); err = schedule_erase(ubi, e1, 0);
if (err) { if (err)
kmem_cache_free(ubi_wl_entry_slab, e1); goto out_error;
ubi_ro_mode(ubi);
} }
dbg_wl("done"); dbg_wl("done");
return err; mutex_unlock(&ubi->move_mutex);
return 0;
/* /*
* Some error occurred. @e1 was not changed, so return it back. @e2 * For some reasons the LEB was not moved, might be an error, might be
* might be changed, schedule it for erasure. * something else. @e1 was not changed, so return it back. @e2 might
* be changed, schedule it for erasure.
*/ */
error: out_not_moved:
if (err)
dbg_wl("error %d occurred, cancel operation", err);
ubi_assert(err <= 0);
ubi_free_vid_hdr(ubi, vid_hdr); ubi_free_vid_hdr(ubi, vid_hdr);
spin_lock(&ubi->wl_lock); spin_lock(&ubi->wl_lock);
ubi->wl_scheduled = 0; if (scrubbing)
if (ubi->move_from_put) wl_tree_add(e1, &ubi->scrub);
put = 1;
else else
wl_tree_add(e1, &ubi->used); wl_tree_add(e1, &ubi->used);
ubi->move_from = ubi->move_to = NULL; ubi->move_from = ubi->move_to = NULL;
ubi->move_from_put = ubi->move_to_put = 0; ubi->move_to_put = ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
if (put) {
/*
* Well, the target PEB was put meanwhile, schedule it for
* erasure.
*/
dbg_wl("PEB %d was put meanwhile, erase", e1->pnum);
err = schedule_erase(ubi, e1, 0);
if (err) {
kmem_cache_free(ubi_wl_entry_slab, e1);
ubi_ro_mode(ubi);
}
}
err = schedule_erase(ubi, e2, 0); err = schedule_erase(ubi, e2, 0);
if (err) { if (err)
goto out_error;
mutex_unlock(&ubi->move_mutex);
return 0;
out_error:
ubi_err("error %d while moving PEB %d to PEB %d",
err, e1->pnum, e2->pnum);
ubi_free_vid_hdr(ubi, vid_hdr);
spin_lock(&ubi->wl_lock);
ubi->move_from = ubi->move_to = NULL;
ubi->move_to_put = ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock);
kmem_cache_free(ubi_wl_entry_slab, e1);
kmem_cache_free(ubi_wl_entry_slab, e2); kmem_cache_free(ubi_wl_entry_slab, e2);
ubi_ro_mode(ubi); ubi_ro_mode(ubi);
}
yield(); mutex_unlock(&ubi->move_mutex);
return err; return err;
out_cancel:
ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock);
mutex_unlock(&ubi->move_mutex);
ubi_free_vid_hdr(ubi, vid_hdr);
return 0;
} }
/** /**
...@@ -1101,8 +1130,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, ...@@ -1101,8 +1130,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
} }
/** /**
* ubi_wl_put_peb - return a physical eraseblock to the wear-leveling * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
* unit.
* @ubi: UBI device description object * @ubi: UBI device description object
* @pnum: physical eraseblock to return * @pnum: physical eraseblock to return
* @torture: if this physical eraseblock has to be tortured * @torture: if this physical eraseblock has to be tortured
...@@ -1110,7 +1138,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, ...@@ -1110,7 +1138,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
* This function is called to return physical eraseblock @pnum to the pool of * This function is called to return physical eraseblock @pnum to the pool of
* free physical eraseblocks. The @torture flag has to be set if an I/O error * free physical eraseblocks. The @torture flag has to be set if an I/O error
* occurred to this @pnum and it has to be tested. This function returns zero * occurred to this @pnum and it has to be tested. This function returns zero
* in case of success and a negative error code in case of failure. * in case of success, and a negative error code in case of failure.
*/ */
int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
{ {
...@@ -1121,8 +1149,8 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) ...@@ -1121,8 +1149,8 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
ubi_assert(pnum >= 0); ubi_assert(pnum >= 0);
ubi_assert(pnum < ubi->peb_count); ubi_assert(pnum < ubi->peb_count);
retry:
spin_lock(&ubi->wl_lock); spin_lock(&ubi->wl_lock);
e = ubi->lookuptbl[pnum]; e = ubi->lookuptbl[pnum];
if (e == ubi->move_from) { if (e == ubi->move_from) {
/* /*
...@@ -1130,11 +1158,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) ...@@ -1130,11 +1158,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
* be moved. It will be scheduled for erasure in the * be moved. It will be scheduled for erasure in the
* wear-leveling worker. * wear-leveling worker.
*/ */
dbg_wl("PEB %d is being moved", pnum); dbg_wl("PEB %d is being moved, wait", pnum);
ubi_assert(!ubi->move_from_put);
ubi->move_from_put = 1;
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
return 0;
/* Wait for the WL worker by taking the @ubi->move_mutex */
mutex_lock(&ubi->move_mutex);
mutex_unlock(&ubi->move_mutex);
goto retry;
} else if (e == ubi->move_to) { } else if (e == ubi->move_to) {
/* /*
* User is putting the physical eraseblock which was selected * User is putting the physical eraseblock which was selected
...@@ -1157,8 +1187,15 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) ...@@ -1157,8 +1187,15 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
} else if (in_wl_tree(e, &ubi->scrub)) { } else if (in_wl_tree(e, &ubi->scrub)) {
paranoid_check_in_wl_tree(e, &ubi->scrub); paranoid_check_in_wl_tree(e, &ubi->scrub);
rb_erase(&e->rb, &ubi->scrub); rb_erase(&e->rb, &ubi->scrub);
} else } else {
prot_tree_del(ubi, e->pnum); err = prot_tree_del(ubi, e->pnum);
if (err) {
ubi_err("PEB %d not found", pnum);
ubi_ro_mode(ubi);
spin_unlock(&ubi->wl_lock);
return err;
}
}
} }
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
...@@ -1212,8 +1249,17 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) ...@@ -1212,8 +1249,17 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
if (in_wl_tree(e, &ubi->used)) { if (in_wl_tree(e, &ubi->used)) {
paranoid_check_in_wl_tree(e, &ubi->used); paranoid_check_in_wl_tree(e, &ubi->used);
rb_erase(&e->rb, &ubi->used); rb_erase(&e->rb, &ubi->used);
} else } else {
prot_tree_del(ubi, pnum); int err;
err = prot_tree_del(ubi, e->pnum);
if (err) {
ubi_err("PEB %d not found", pnum);
ubi_ro_mode(ubi);
spin_unlock(&ubi->wl_lock);
return err;
}
}
wl_tree_add(e, &ubi->scrub); wl_tree_add(e, &ubi->scrub);
spin_unlock(&ubi->wl_lock); spin_unlock(&ubi->wl_lock);
...@@ -1379,6 +1425,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ...@@ -1379,6 +1425,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
ubi->used = ubi->free = ubi->scrub = RB_ROOT; ubi->used = ubi->free = ubi->scrub = RB_ROOT;
ubi->prot.pnum = ubi->prot.aec = RB_ROOT; ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
spin_lock_init(&ubi->wl_lock); spin_lock_init(&ubi->wl_lock);
mutex_init(&ubi->move_mutex);
ubi->max_ec = si->max_ec; ubi->max_ec = si->max_ec;
INIT_LIST_HEAD(&ubi->works); INIT_LIST_HEAD(&ubi->works);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment