Commit 3f8476fe authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - revert a request-based DM core change that caused IO latency to
   increase and adversely impact both throughput and system load

 - fix for a use after free bug in DM core's device cleanup

 - a couple DM btree removal fixes (used by dm-thinp)

 - a DM thinp fix for order-5 allocation failure

 - a DM thinp fix to not degrade to read-only metadata mode when in
   out-of-data-space mode for longer than the 'no_space_timeout'

 - fix a long-standing oversight in both dm-thinp and dm-cache by now
   exporting 'needs_check' in status if it was set in metadata

 - fix an embarrassing dm-cache busy-loop that caused worker threads to
   eat cpu even if no IO was actively being issued to the cache device

* tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: avoid calls to prealloc_free_structs() if possible
  dm cache: avoid preallocation if no work in writeback_some_dirty_blocks()
  dm cache: do not wake_worker() in free_migration()
  dm cache: display 'needs_check' in status if it is set
  dm thin: display 'needs_check' in status if it is set
  dm thin: stay in out-of-data-space mode once no_space_timeout expires
  dm: fix use after free crash due to incorrect cleanup sequence
  Revert "dm: only run the queue on completion if congested or no requests pending"
  dm btree: silence lockdep lock inversion in dm_btree_del()
  dm thin: allocate the cell_sort_array dynamically
  dm btree remove: fix bug in redistribute3
parents eb254374 665022d7
...@@ -258,6 +258,12 @@ cache metadata mode : ro if read-only, rw if read-write ...@@ -258,6 +258,12 @@ cache metadata mode : ro if read-only, rw if read-write
no further I/O will be permitted and the status will just no further I/O will be permitted and the status will just
contain the string 'Fail'. The userspace recovery tools contain the string 'Fail'. The userspace recovery tools
should then be used. should then be used.
needs_check : 'needs_check' if set, '-' if not set
A metadata operation has failed, resulting in the needs_check
flag being set in the metadata's superblock. The metadata
device must be deactivated and checked/repaired before the
cache can be made fully operational again. '-' indicates
needs_check is not set.
Messages Messages
-------- --------
......
...@@ -296,7 +296,7 @@ ii) Status ...@@ -296,7 +296,7 @@ ii) Status
underlying device. When this is enabled when loading the table, underlying device. When this is enabled when loading the table,
it can get disabled if the underlying device doesn't support it. it can get disabled if the underlying device doesn't support it.
ro|rw ro|rw|out_of_data_space
If the pool encounters certain types of device failures it will If the pool encounters certain types of device failures it will
drop into a read-only metadata mode in which no changes to drop into a read-only metadata mode in which no changes to
the pool metadata (like allocating new blocks) are permitted. the pool metadata (like allocating new blocks) are permitted.
...@@ -314,6 +314,13 @@ ii) Status ...@@ -314,6 +314,13 @@ ii) Status
module parameter can be used to change this timeout -- it module parameter can be used to change this timeout -- it
defaults to 60 seconds but may be disabled using a value of 0. defaults to 60 seconds but may be disabled using a value of 0.
needs_check
A metadata operation has failed, resulting in the needs_check
flag being set in the metadata's superblock. The metadata
device must be deactivated and checked/repaired before the
thin-pool can be made fully operational again. '-' indicates
needs_check is not set.
iii) Messages iii) Messages
create_thin <dev id> create_thin <dev id>
......
...@@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg) ...@@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg)
wake_up(&cache->migration_wait); wake_up(&cache->migration_wait);
mempool_free(mg, cache->migration_pool); mempool_free(mg, cache->migration_pool);
wake_worker(cache);
} }
static int prealloc_data_structs(struct cache *cache, struct prealloc *p) static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
...@@ -1947,6 +1946,7 @@ static int commit_if_needed(struct cache *cache) ...@@ -1947,6 +1946,7 @@ static int commit_if_needed(struct cache *cache)
static void process_deferred_bios(struct cache *cache) static void process_deferred_bios(struct cache *cache)
{ {
bool prealloc_used = false;
unsigned long flags; unsigned long flags;
struct bio_list bios; struct bio_list bios;
struct bio *bio; struct bio *bio;
...@@ -1981,13 +1981,16 @@ static void process_deferred_bios(struct cache *cache) ...@@ -1981,13 +1981,16 @@ static void process_deferred_bios(struct cache *cache)
process_discard_bio(cache, &structs, bio); process_discard_bio(cache, &structs, bio);
else else
process_bio(cache, &structs, bio); process_bio(cache, &structs, bio);
prealloc_used = true;
} }
prealloc_free_structs(cache, &structs); if (prealloc_used)
prealloc_free_structs(cache, &structs);
} }
static void process_deferred_cells(struct cache *cache) static void process_deferred_cells(struct cache *cache)
{ {
bool prealloc_used = false;
unsigned long flags; unsigned long flags;
struct dm_bio_prison_cell *cell, *tmp; struct dm_bio_prison_cell *cell, *tmp;
struct list_head cells; struct list_head cells;
...@@ -2015,9 +2018,11 @@ static void process_deferred_cells(struct cache *cache) ...@@ -2015,9 +2018,11 @@ static void process_deferred_cells(struct cache *cache)
} }
process_cell(cache, &structs, cell); process_cell(cache, &structs, cell);
prealloc_used = true;
} }
prealloc_free_structs(cache, &structs); if (prealloc_used)
prealloc_free_structs(cache, &structs);
} }
static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
...@@ -2062,7 +2067,7 @@ static void process_deferred_writethrough_bios(struct cache *cache) ...@@ -2062,7 +2067,7 @@ static void process_deferred_writethrough_bios(struct cache *cache)
static void writeback_some_dirty_blocks(struct cache *cache) static void writeback_some_dirty_blocks(struct cache *cache)
{ {
int r = 0; bool prealloc_used = false;
dm_oblock_t oblock; dm_oblock_t oblock;
dm_cblock_t cblock; dm_cblock_t cblock;
struct prealloc structs; struct prealloc structs;
...@@ -2072,23 +2077,21 @@ static void writeback_some_dirty_blocks(struct cache *cache) ...@@ -2072,23 +2077,21 @@ static void writeback_some_dirty_blocks(struct cache *cache)
memset(&structs, 0, sizeof(structs)); memset(&structs, 0, sizeof(structs));
while (spare_migration_bandwidth(cache)) { while (spare_migration_bandwidth(cache)) {
if (prealloc_data_structs(cache, &structs)) if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
break; break; /* no work to do */
r = policy_writeback_work(cache->policy, &oblock, &cblock, busy); if (prealloc_data_structs(cache, &structs) ||
if (r) get_cell(cache, oblock, &structs, &old_ocell)) {
break;
r = get_cell(cache, oblock, &structs, &old_ocell);
if (r) {
policy_set_dirty(cache->policy, oblock); policy_set_dirty(cache->policy, oblock);
break; break;
} }
writeback(cache, &structs, oblock, cblock, old_ocell); writeback(cache, &structs, oblock, cblock, old_ocell);
prealloc_used = true;
} }
prealloc_free_structs(cache, &structs); if (prealloc_used)
prealloc_free_structs(cache, &structs);
} }
/*---------------------------------------------------------------- /*----------------------------------------------------------------
...@@ -3496,7 +3499,7 @@ static void cache_resume(struct dm_target *ti) ...@@ -3496,7 +3499,7 @@ static void cache_resume(struct dm_target *ti)
* <#demotions> <#promotions> <#dirty> * <#demotions> <#promotions> <#dirty>
* <#features> <features>* * <#features> <features>*
* <#core args> <core args> * <#core args> <core args>
* <policy name> <#policy args> <policy args>* <cache metadata mode> * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
*/ */
static void cache_status(struct dm_target *ti, status_type_t type, static void cache_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen) unsigned status_flags, char *result, unsigned maxlen)
...@@ -3582,6 +3585,11 @@ static void cache_status(struct dm_target *ti, status_type_t type, ...@@ -3582,6 +3585,11 @@ static void cache_status(struct dm_target *ti, status_type_t type,
else else
DMEMIT("rw "); DMEMIT("rw ");
if (dm_cache_metadata_needs_check(cache->cmd))
DMEMIT("needs_check ");
else
DMEMIT("- ");
break; break;
case STATUSTYPE_TABLE: case STATUSTYPE_TABLE:
...@@ -3820,7 +3828,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) ...@@ -3820,7 +3828,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = { static struct target_type cache_target = {
.name = "cache", .name = "cache",
.version = {1, 7, 0}, .version = {1, 8, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = cache_ctr, .ctr = cache_ctr,
.dtr = cache_dtr, .dtr = cache_dtr,
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
...@@ -268,7 +269,7 @@ struct pool { ...@@ -268,7 +269,7 @@ struct pool {
process_mapping_fn process_prepared_mapping; process_mapping_fn process_prepared_mapping;
process_mapping_fn process_prepared_discard; process_mapping_fn process_prepared_discard;
struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; struct dm_bio_prison_cell **cell_sort_array;
}; };
static enum pool_mode get_pool_mode(struct pool *pool); static enum pool_mode get_pool_mode(struct pool *pool);
...@@ -2281,18 +2282,23 @@ static void do_waker(struct work_struct *ws) ...@@ -2281,18 +2282,23 @@ static void do_waker(struct work_struct *ws)
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
} }
static void notify_of_pool_mode_change_to_oods(struct pool *pool);
/* /*
* We're holding onto IO to allow userland time to react. After the * We're holding onto IO to allow userland time to react. After the
* timeout either the pool will have been resized (and thus back in * timeout either the pool will have been resized (and thus back in
* PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
*/ */
static void do_no_space_timeout(struct work_struct *ws) static void do_no_space_timeout(struct work_struct *ws)
{ {
struct pool *pool = container_of(to_delayed_work(ws), struct pool, struct pool *pool = container_of(to_delayed_work(ws), struct pool,
no_space_timeout); no_space_timeout);
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
set_pool_mode(pool, PM_READ_ONLY); pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool);
error_retry_list(pool);
}
} }
/*----------------------------------------------------------------*/ /*----------------------------------------------------------------*/
...@@ -2370,6 +2376,14 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) ...@@ -2370,6 +2376,14 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
dm_device_name(pool->pool_md), new_mode); dm_device_name(pool->pool_md), new_mode);
} }
static void notify_of_pool_mode_change_to_oods(struct pool *pool)
{
if (!pool->pf.error_if_no_space)
notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
else
notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
}
static bool passdown_enabled(struct pool_c *pt) static bool passdown_enabled(struct pool_c *pt)
{ {
return pt->adjusted_pf.discard_passdown; return pt->adjusted_pf.discard_passdown;
...@@ -2454,7 +2468,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2454,7 +2468,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
* frequently seeing this mode. * frequently seeing this mode.
*/ */
if (old_mode != new_mode) if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "out-of-data-space"); notify_of_pool_mode_change_to_oods(pool);
pool->process_bio = process_bio_read_only; pool->process_bio = process_bio_read_only;
pool->process_discard = process_discard_bio; pool->process_discard = process_discard_bio;
pool->process_cell = process_cell_read_only; pool->process_cell = process_cell_read_only;
...@@ -2777,6 +2791,7 @@ static void __pool_destroy(struct pool *pool) ...@@ -2777,6 +2791,7 @@ static void __pool_destroy(struct pool *pool)
{ {
__pool_table_remove(pool); __pool_table_remove(pool);
vfree(pool->cell_sort_array);
if (dm_pool_metadata_close(pool->pmd) < 0) if (dm_pool_metadata_close(pool->pmd) < 0)
DMWARN("%s: dm_pool_metadata_close() failed.", __func__); DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
...@@ -2889,6 +2904,13 @@ static struct pool *pool_create(struct mapped_device *pool_md, ...@@ -2889,6 +2904,13 @@ static struct pool *pool_create(struct mapped_device *pool_md,
goto bad_mapping_pool; goto bad_mapping_pool;
} }
pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
if (!pool->cell_sort_array) {
*error = "Error allocating cell sort array";
err_p = ERR_PTR(-ENOMEM);
goto bad_sort_array;
}
pool->ref_count = 1; pool->ref_count = 1;
pool->last_commit_jiffies = jiffies; pool->last_commit_jiffies = jiffies;
pool->pool_md = pool_md; pool->pool_md = pool_md;
...@@ -2897,6 +2919,8 @@ static struct pool *pool_create(struct mapped_device *pool_md, ...@@ -2897,6 +2919,8 @@ static struct pool *pool_create(struct mapped_device *pool_md,
return pool; return pool;
bad_sort_array:
mempool_destroy(pool->mapping_pool);
bad_mapping_pool: bad_mapping_pool:
dm_deferred_set_destroy(pool->all_io_ds); dm_deferred_set_destroy(pool->all_io_ds);
bad_all_io_ds: bad_all_io_ds:
...@@ -3714,6 +3738,7 @@ static void emit_flags(struct pool_features *pf, char *result, ...@@ -3714,6 +3738,7 @@ static void emit_flags(struct pool_features *pf, char *result,
* Status line is: * Status line is:
* <transaction id> <used metadata sectors>/<total metadata sectors> * <transaction id> <used metadata sectors>/<total metadata sectors>
* <used data sectors>/<total data sectors> <held metadata root> * <used data sectors>/<total data sectors> <held metadata root>
* <pool mode> <discard config> <no space config> <needs_check>
*/ */
static void pool_status(struct dm_target *ti, status_type_t type, static void pool_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen) unsigned status_flags, char *result, unsigned maxlen)
...@@ -3815,6 +3840,11 @@ static void pool_status(struct dm_target *ti, status_type_t type, ...@@ -3815,6 +3840,11 @@ static void pool_status(struct dm_target *ti, status_type_t type,
else else
DMEMIT("queue_if_no_space "); DMEMIT("queue_if_no_space ");
if (dm_pool_metadata_needs_check(pool->pmd))
DMEMIT("needs_check ");
else
DMEMIT("- ");
break; break;
case STATUSTYPE_TABLE: case STATUSTYPE_TABLE:
...@@ -3918,7 +3948,7 @@ static struct target_type pool_target = { ...@@ -3918,7 +3948,7 @@ static struct target_type pool_target = {
.name = "thin-pool", .name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE, DM_TARGET_IMMUTABLE,
.version = {1, 15, 0}, .version = {1, 16, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = pool_ctr, .ctr = pool_ctr,
.dtr = pool_dtr, .dtr = pool_dtr,
...@@ -4305,7 +4335,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) ...@@ -4305,7 +4335,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = { static struct target_type thin_target = {
.name = "thin", .name = "thin",
.version = {1, 15, 0}, .version = {1, 16, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = thin_ctr, .ctr = thin_ctr,
.dtr = thin_dtr, .dtr = thin_dtr,
......
...@@ -1067,13 +1067,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) ...@@ -1067,13 +1067,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
*/ */
static void rq_completed(struct mapped_device *md, int rw, bool run_queue) static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{ {
int nr_requests_pending;
atomic_dec(&md->pending[rw]); atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */ /* nudge anyone waiting on suspend queue */
nr_requests_pending = md_in_flight(md); if (!md_in_flight(md))
if (!nr_requests_pending)
wake_up(&md->wait); wake_up(&md->wait);
/* /*
...@@ -1085,8 +1082,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) ...@@ -1085,8 +1082,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
if (run_queue) { if (run_queue) {
if (md->queue->mq_ops) if (md->queue->mq_ops)
blk_mq_run_hw_queues(md->queue, true); blk_mq_run_hw_queues(md->queue, true);
else if (!nr_requests_pending || else
(nr_requests_pending >= md->queue->nr_congestion_on))
blk_run_queue_async(md->queue); blk_run_queue_async(md->queue);
} }
...@@ -2281,8 +2277,6 @@ static void dm_init_old_md_queue(struct mapped_device *md) ...@@ -2281,8 +2277,6 @@ static void dm_init_old_md_queue(struct mapped_device *md)
static void cleanup_mapped_device(struct mapped_device *md) static void cleanup_mapped_device(struct mapped_device *md)
{ {
cleanup_srcu_struct(&md->io_barrier);
if (md->wq) if (md->wq)
destroy_workqueue(md->wq); destroy_workqueue(md->wq);
if (md->kworker_task) if (md->kworker_task)
...@@ -2294,6 +2288,8 @@ static void cleanup_mapped_device(struct mapped_device *md) ...@@ -2294,6 +2288,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
if (md->bs) if (md->bs)
bioset_free(md->bs); bioset_free(md->bs);
cleanup_srcu_struct(&md->io_barrier);
if (md->disk) { if (md->disk) {
spin_lock(&_minor_lock); spin_lock(&_minor_lock);
md->disk->private_data = NULL; md->disk->private_data = NULL;
......
...@@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, ...@@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
if (s < 0 && nr_center < -s) { if (s < 0 && nr_center < -s) {
/* not enough in central node */ /* not enough in central node */
shift(left, center, nr_center); shift(left, center, -nr_center);
s = nr_center - target; s += nr_center;
shift(left, right, s); shift(left, right, s);
nr_right += s; nr_right += s;
} else } else
...@@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, ...@@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
if (s > 0 && nr_center < s) { if (s > 0 && nr_center < s) {
/* not enough in central node */ /* not enough in central node */
shift(center, right, nr_center); shift(center, right, nr_center);
s = target - nr_center; s -= nr_center;
shift(left, right, s); shift(left, right, s);
nr_left -= s; nr_left -= s;
} else } else
......
...@@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) ...@@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
int r; int r;
struct del_stack *s; struct del_stack *s;
s = kmalloc(sizeof(*s), GFP_KERNEL); s = kmalloc(sizeof(*s), GFP_NOIO);
if (!s) if (!s)
return -ENOMEM; return -ENOMEM;
s->info = info; s->info = info;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment