Commit 8715c6d3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.2/dm-changes' of...

Merge tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Fix use-after-free races due to missing resource cleanup during DM
   target destruction in DM targets: thin-pool, cache, integrity and
   clone.

 - Fix ABBA deadlocks in DM thin-pool and cache targets due to their use
   of a bufio client (that has a shrinker whose locking can cause the
   incorrect locking order).

 - Fix DM cache target to set its needs_check flag after first aborting
   the metadata (whereby using reset persistent-data objects to update
   the superblock with, otherwise the superblock update could be dropped
   due to aborting metadata). This was found with code-inspection when
   comparing with the equivalent in DM thinp code.

 - Fix DM thin-pool's presume to continue resuming the device even if
   the pool in is fail mode -- otherwise bios may never be failed up the
   IO stack (which will prevent resetting the thin-pool target via table
   reload)

 - Fix DM thin-pool's metadata to use proper btree root (from previous
   transaction) if metadata commit failed.

 - Add 'waitfor' module param to DM module (dm_mod) to allow dm-init to
   wait for the specified device before continuing with its DM target
   initialization.

* tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm thin: Use last transaction's pmd->root when commit failed
  dm init: add dm-mod.waitfor to wait for asynchronously probed block devices
  dm ioctl: fix a couple ioctl codes
  dm ioctl: a small code cleanup in list_version_get_info
  dm thin: resume even if in FAIL mode
  dm cache: set needs_check flag after aborting metadata
  dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort
  dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata
  dm integrity: Fix UAF in dm_integrity_dtr()
  dm cache: Fix UAF in destroy()
  dm clone: Fix UAF in clone_dtr()
  dm thin: Fix UAF in run_timer_softirq()
parents 8ecd28b7 7991dbff
......@@ -123,3 +123,11 @@ Other examples (per target):
0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
For setups using device-mapper on top of asynchronously probed block
devices (MMC, USB, ..), it may be necessary to tell dm-init to
explicitly wait for them to become available before setting up the
device-mapper tables. This can be done with the "dm-mod.waitfor="
module parameter, which takes a list of devices to wait for::
dm-mod.waitfor=<device1>[,..,<deviceN>]
......@@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
return r;
}
static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd,
bool destroy_bm)
{
dm_sm_destroy(cmd->metadata_sm);
dm_tm_destroy(cmd->tm);
dm_block_manager_destroy(cmd->bm);
if (destroy_bm)
dm_block_manager_destroy(cmd->bm);
}
typedef unsigned long (*flags_mutator)(unsigned long);
......@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
cmd2 = lookup(bdev);
if (cmd2) {
mutex_unlock(&table_lock);
__destroy_persistent_data_objects(cmd);
__destroy_persistent_data_objects(cmd, true);
kfree(cmd);
return cmd2;
}
......@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
mutex_unlock(&table_lock);
if (!cmd->fail_io)
__destroy_persistent_data_objects(cmd);
__destroy_persistent_data_objects(cmd, true);
kfree(cmd);
}
}
......@@ -1807,14 +1809,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
{
int r;
int r = -EINVAL;
struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
/* fail_io is double-checked with cmd->root_lock held below */
if (unlikely(cmd->fail_io))
return r;
/*
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs cmd root_lock).
* - must take shrinker_rwsem without holding cmd->root_lock
*/
new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_MAX_CONCURRENT_LOCKS);
WRITE_LOCK(cmd);
__destroy_persistent_data_objects(cmd);
r = __create_persistent_data_objects(cmd, false);
if (cmd->fail_io) {
WRITE_UNLOCK(cmd);
goto out;
}
__destroy_persistent_data_objects(cmd, false);
old_bm = cmd->bm;
if (IS_ERR(new_bm)) {
DMERR("could not create block manager during abort");
cmd->bm = NULL;
r = PTR_ERR(new_bm);
goto out_unlock;
}
cmd->bm = new_bm;
r = __open_or_format_metadata(cmd, false);
if (r) {
cmd->bm = NULL;
goto out_unlock;
}
new_bm = NULL;
out_unlock:
if (r)
cmd->fail_io = true;
WRITE_UNLOCK(cmd);
dm_block_manager_destroy(old_bm);
out:
if (new_bm && !IS_ERR(new_bm))
dm_block_manager_destroy(new_bm);
return r;
}
......@@ -907,16 +907,16 @@ static void abort_transaction(struct cache *cache)
if (get_cache_mode(cache) >= CM_READ_ONLY)
return;
if (dm_cache_metadata_set_needs_check(cache->cmd)) {
DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
set_cache_mode(cache, CM_FAIL);
}
DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
if (dm_cache_metadata_abort(cache->cmd)) {
DMERR("%s: failed to abort metadata transaction", dev_name);
set_cache_mode(cache, CM_FAIL);
}
if (dm_cache_metadata_set_needs_check(cache->cmd)) {
DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
set_cache_mode(cache, CM_FAIL);
}
}
static void metadata_operation_failed(struct cache *cache, const char *op, int r)
......@@ -1887,6 +1887,7 @@ static void destroy(struct cache *cache)
if (cache->prison)
dm_bio_prison_destroy_v2(cache->prison);
cancel_delayed_work_sync(&cache->waker);
if (cache->wq)
destroy_workqueue(cache->wq);
......
......@@ -1958,6 +1958,7 @@ static void clone_dtr(struct dm_target *ti)
mempool_exit(&clone->hydration_pool);
dm_kcopyd_client_destroy(clone->kcopyd_client);
cancel_delayed_work_sync(&clone->waker);
destroy_workqueue(clone->wq);
hash_table_exit(clone);
dm_clone_metadata_close(clone->cmd);
......
......@@ -8,6 +8,7 @@
*/
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/device-mapper.h>
#include <linux/init.h>
......@@ -18,12 +19,17 @@
#define DM_MAX_DEVICES 256
#define DM_MAX_TARGETS 256
#define DM_MAX_STR_SIZE 4096
#define DM_MAX_WAITFOR 256
static char *create;
static char *waitfor[DM_MAX_WAITFOR];
/*
* Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
* Table format: <start_sector> <num_sectors> <target_type> <target_args>
* Block devices to wait for to become available before setting up tables:
* dm-mod.waitfor=<device1>[,..,<deviceN>]
*
* See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format
* details.
......@@ -266,7 +272,7 @@ static int __init dm_init_init(void)
struct dm_device *dev;
LIST_HEAD(devices);
char *str;
int r;
int i, r;
if (!create)
return 0;
......@@ -286,6 +292,17 @@ static int __init dm_init_init(void)
DMINFO("waiting for all devices to be available before creating mapped devices");
wait_for_device_probe();
for (i = 0; i < ARRAY_SIZE(waitfor); i++) {
if (waitfor[i]) {
DMINFO("waiting for device %s ...", waitfor[i]);
while (!dm_get_dev_t(waitfor[i]))
msleep(5);
}
}
if (waitfor[0])
DMINFO("all devices available");
list_for_each_entry(dev, &devices, list) {
if (dm_early_create(&dev->dmi, dev->table,
dev->target_args_array))
......@@ -301,3 +318,6 @@ late_initcall(dm_init_init);
module_param(create, charp, 0);
MODULE_PARM_DESC(create, "Create a mapped device in early boot");
module_param_array(waitfor, charp, NULL, 0);
MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables");
......@@ -4558,6 +4558,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
BUG_ON(!list_empty(&ic->wait_list));
if (ic->mode == 'B')
cancel_delayed_work_sync(&ic->bitmap_flush_work);
if (ic->metadata_wq)
destroy_workqueue(ic->metadata_wq);
if (ic->wait_wq)
......
......@@ -681,7 +681,7 @@ static void list_version_get_info(struct target_type *tt, void *param)
strcpy(info->vers->name, tt->name);
info->old_vers = info->vers;
info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
info->vers = align_ptr((void *)(info->vers + 1) + strlen(tt->name) + 1);
}
static int __list_versions(struct dm_ioctl *param, size_t param_size, const char *name)
......@@ -1788,8 +1788,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
{DM_TARGET_MSG_CMD, 0, target_message},
{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
{DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
{DM_GET_TARGET_VERSION, 0, get_target_version},
{DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
{DM_GET_TARGET_VERSION_CMD, 0, get_target_version},
};
if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
......
......@@ -724,6 +724,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
goto bad_cleanup_data_sm;
}
/*
* For pool metadata opening process, root setting is redundant
* because it will be set again in __begin_transaction(). But dm
* pool aborting process really needs to get last transaction's
* root to avoid accessing broken btree.
*/
pmd->root = le64_to_cpu(disk_super->data_mapping_root);
pmd->details_root = le64_to_cpu(disk_super->device_details_root);
__setup_btree_details(pmd);
dm_bm_unlock(sblock);
......@@ -776,13 +785,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f
return r;
}
static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
bool destroy_bm)
{
dm_sm_destroy(pmd->data_sm);
dm_sm_destroy(pmd->metadata_sm);
dm_tm_destroy(pmd->nb_tm);
dm_tm_destroy(pmd->tm);
dm_block_manager_destroy(pmd->bm);
if (destroy_bm)
dm_block_manager_destroy(pmd->bm);
}
static int __begin_transaction(struct dm_pool_metadata *pmd)
......@@ -989,7 +1000,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
}
pmd_write_unlock(pmd);
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd);
__destroy_persistent_data_objects(pmd, true);
kfree(pmd);
return 0;
......@@ -1860,19 +1871,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
{
int r = -EINVAL;
struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
/* fail_io is double-checked with pmd->root_lock held below */
if (unlikely(pmd->fail_io))
return r;
/*
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
* - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
pmd_write_lock(pmd);
if (pmd->fail_io)
if (pmd->fail_io) {
pmd_write_unlock(pmd);
goto out;
}
__set_abort_with_changes_flags(pmd);
__destroy_persistent_data_objects(pmd);
r = __create_persistent_data_objects(pmd, false);
__destroy_persistent_data_objects(pmd, false);
old_bm = pmd->bm;
if (IS_ERR(new_bm)) {
DMERR("could not create block manager during abort");
pmd->bm = NULL;
r = PTR_ERR(new_bm);
goto out_unlock;
}
pmd->bm = new_bm;
r = __open_or_format_metadata(pmd, false);
if (r) {
pmd->bm = NULL;
goto out_unlock;
}
new_bm = NULL;
out_unlock:
if (r)
pmd->fail_io = true;
out:
pmd_write_unlock(pmd);
dm_block_manager_destroy(old_bm);
out:
if (new_bm && !IS_ERR(new_bm))
dm_block_manager_destroy(new_bm);
return r;
}
......
......@@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool)
dm_bio_prison_destroy(pool->prison);
dm_kcopyd_client_destroy(pool->copier);
cancel_delayed_work_sync(&pool->waker);
cancel_delayed_work_sync(&pool->no_space_timeout);
if (pool->wq)
destroy_workqueue(pool->wq);
......@@ -3540,20 +3542,28 @@ static int pool_preresume(struct dm_target *ti)
*/
r = bind_control_target(pool, ti);
if (r)
return r;
goto out;
r = maybe_resize_data_dev(ti, &need_commit1);
if (r)
return r;
goto out;
r = maybe_resize_metadata_dev(ti, &need_commit2);
if (r)
return r;
goto out;
if (need_commit1 || need_commit2)
(void) commit(pool);
out:
/*
* When a thin-pool is PM_FAIL, it cannot be rebuilt if
* bio is in deferred list. Therefore need to return 0
* to allow pool_resume() to flush IO.
*/
if (r && get_pool_mode(pool) == PM_FAIL)
r = 0;
return 0;
return r;
}
static void pool_suspend_active_thins(struct pool *pool)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment