Commit a0efc03b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.19/dm-fixes' of...

Merge tag 'for-4.19/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - DM verity fix for crash due to using vmalloc'd buffers with the
   asynchronous crypto hadsh API.

 - Fix to both DM crypt and DM integrity targets to discontinue using
   CRYPTO_TFM_REQ_MAY_SLEEP because its use of GFP_KERNEL can lead to
   deadlock by recursing back into a filesystem.

 - Various DM raid fixes related to reshape and rebuild races.

 - Fix for DM thin-provisioning to avoid data corruption that was a
   side-effect of needing to abort DM thin metadata transaction due to
   running out of metadata space. Fix is to reserve a small amount of
   metadata space so that once it is used the DM thin-pool can finish
   its active transaction before switching to read-only mode.

* tag 'for-4.19/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm thin metadata: try to avoid ever aborting transactions
  dm raid: bump target version, update comments and documentation
  dm raid: fix RAID leg rebuild errors
  dm raid: fix rebuild of specific devices by updating superblock
  dm raid: fix stripe adding reshape deadlock
  dm raid: fix reshape race on small devices
  dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL recursion deadlock
  dm verity: fix crash on bufio buffer that was allocated with vmalloc
parents 0f9aeeac 3ab91828
...@@ -348,3 +348,7 @@ Version History ...@@ -348,3 +348,7 @@ Version History
1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an 1.13.1 Fix deadlock caused by early md_stop_writes(). Also fix size an
state races. state races.
1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen 1.13.2 Fix raid redundancy validation and avoid keeping raid set frozen
1.14.0 Fix reshape race on small devices. Fix stripe adding reshape
deadlock/potential data corruption. Update superblock when
specific devices are requested via rebuild. Fix RAID leg
rebuild errors.
...@@ -332,7 +332,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) ...@@ -332,7 +332,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
int err; int err;
desc->tfm = essiv->hash_tfm; desc->tfm = essiv->hash_tfm;
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc->flags = 0;
err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt); err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt);
shash_desc_zero(desc); shash_desc_zero(desc);
...@@ -606,7 +606,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, ...@@ -606,7 +606,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
int i, r; int i, r;
desc->tfm = lmk->hash_tfm; desc->tfm = lmk->hash_tfm;
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc->flags = 0;
r = crypto_shash_init(desc); r = crypto_shash_init(desc);
if (r) if (r)
...@@ -768,7 +768,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, ...@@ -768,7 +768,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
/* calculate crc32 for every 32bit part and xor it */ /* calculate crc32 for every 32bit part and xor it */
desc->tfm = tcw->crc32_tfm; desc->tfm = tcw->crc32_tfm;
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc->flags = 0;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
r = crypto_shash_init(desc); r = crypto_shash_init(desc);
if (r) if (r)
...@@ -1251,7 +1251,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, ...@@ -1251,7 +1251,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
* requests if driver request queue is full. * requests if driver request queue is full.
*/ */
skcipher_request_set_callback(ctx->r.req, skcipher_request_set_callback(ctx->r.req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));
} }
...@@ -1268,7 +1268,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, ...@@ -1268,7 +1268,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
* requests if driver request queue is full. * requests if driver request queue is full.
*/ */
aead_request_set_callback(ctx->r.req_aead, aead_request_set_callback(ctx->r.req_aead,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));
} }
......
...@@ -532,7 +532,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result ...@@ -532,7 +532,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result
unsigned j, size; unsigned j, size;
desc->tfm = ic->journal_mac; desc->tfm = ic->journal_mac;
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc->flags = 0;
r = crypto_shash_init(desc); r = crypto_shash_init(desc);
if (unlikely(r)) { if (unlikely(r)) {
...@@ -676,7 +676,7 @@ static void complete_journal_encrypt(struct crypto_async_request *req, int err) ...@@ -676,7 +676,7 @@ static void complete_journal_encrypt(struct crypto_async_request *req, int err)
static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
{ {
int r; int r;
skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
complete_journal_encrypt, comp); complete_journal_encrypt, comp);
if (likely(encrypt)) if (likely(encrypt))
r = crypto_skcipher_encrypt(req); r = crypto_skcipher_encrypt(req);
......
This diff is collapsed.
...@@ -188,6 +188,12 @@ struct dm_pool_metadata { ...@@ -188,6 +188,12 @@ struct dm_pool_metadata {
unsigned long flags; unsigned long flags;
sector_t data_block_size; sector_t data_block_size;
/*
* We reserve a section of the metadata for commit overhead.
* All reported space does *not* include this.
*/
dm_block_t metadata_reserve;
/* /*
* Set if a transaction has to be aborted but the attempt to roll back * Set if a transaction has to be aborted but the attempt to roll back
* to the previous (good) transaction failed. The only pool metadata * to the previous (good) transaction failed. The only pool metadata
...@@ -816,6 +822,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) ...@@ -816,6 +822,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
return dm_tm_commit(pmd->tm, sblock); return dm_tm_commit(pmd->tm, sblock);
} }
static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
{
int r;
dm_block_t total;
dm_block_t max_blocks = 4096; /* 16M */
r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
if (r) {
DMERR("could not get size of metadata device");
pmd->metadata_reserve = max_blocks;
} else {
sector_div(total, 10);
pmd->metadata_reserve = min(max_blocks, total);
}
}
struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
sector_t data_block_size, sector_t data_block_size,
bool format_device) bool format_device)
...@@ -849,6 +871,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, ...@@ -849,6 +871,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
return ERR_PTR(r); return ERR_PTR(r);
} }
__set_metadata_reserve(pmd);
return pmd; return pmd;
} }
...@@ -1820,6 +1844,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, ...@@ -1820,6 +1844,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
down_read(&pmd->root_lock); down_read(&pmd->root_lock);
if (!pmd->fail_io) if (!pmd->fail_io)
r = dm_sm_get_nr_free(pmd->metadata_sm, result); r = dm_sm_get_nr_free(pmd->metadata_sm, result);
if (!r) {
if (*result < pmd->metadata_reserve)
*result = 0;
else
*result -= pmd->metadata_reserve;
}
up_read(&pmd->root_lock); up_read(&pmd->root_lock);
return r; return r;
...@@ -1932,8 +1963,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou ...@@ -1932,8 +1963,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou
int r = -EINVAL; int r = -EINVAL;
down_write(&pmd->root_lock); down_write(&pmd->root_lock);
if (!pmd->fail_io) if (!pmd->fail_io) {
r = __resize_space_map(pmd->metadata_sm, new_count); r = __resize_space_map(pmd->metadata_sm, new_count);
if (!r)
__set_metadata_reserve(pmd);
}
up_write(&pmd->root_lock); up_write(&pmd->root_lock);
return r; return r;
......
...@@ -200,7 +200,13 @@ struct dm_thin_new_mapping; ...@@ -200,7 +200,13 @@ struct dm_thin_new_mapping;
enum pool_mode { enum pool_mode {
PM_WRITE, /* metadata may be changed */ PM_WRITE, /* metadata may be changed */
PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
/*
* Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
*/
PM_OUT_OF_METADATA_SPACE,
PM_READ_ONLY, /* metadata may not be changed */ PM_READ_ONLY, /* metadata may not be changed */
PM_FAIL, /* all I/O fails */ PM_FAIL, /* all I/O fails */
}; };
...@@ -1371,7 +1377,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); ...@@ -1371,7 +1377,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
static void requeue_bios(struct pool *pool); static void requeue_bios(struct pool *pool);
static void check_for_space(struct pool *pool) static bool is_read_only_pool_mode(enum pool_mode mode)
{
return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
}
static bool is_read_only(struct pool *pool)
{
return is_read_only_pool_mode(get_pool_mode(pool));
}
static void check_for_metadata_space(struct pool *pool)
{
int r;
const char *ooms_reason = NULL;
dm_block_t nr_free;
r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
if (r)
ooms_reason = "Could not get free metadata blocks";
else if (!nr_free)
ooms_reason = "No free metadata blocks";
if (ooms_reason && !is_read_only(pool)) {
DMERR("%s", ooms_reason);
set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
}
}
static void check_for_data_space(struct pool *pool)
{ {
int r; int r;
dm_block_t nr_free; dm_block_t nr_free;
...@@ -1397,14 +1431,16 @@ static int commit(struct pool *pool) ...@@ -1397,14 +1431,16 @@ static int commit(struct pool *pool)
{ {
int r; int r;
if (get_pool_mode(pool) >= PM_READ_ONLY) if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
return -EINVAL; return -EINVAL;
r = dm_pool_commit_metadata(pool->pmd); r = dm_pool_commit_metadata(pool->pmd);
if (r) if (r)
metadata_operation_failed(pool, "dm_pool_commit_metadata", r); metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
else else {
check_for_space(pool); check_for_metadata_space(pool);
check_for_data_space(pool);
}
return r; return r;
} }
...@@ -1470,6 +1506,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) ...@@ -1470,6 +1506,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
return r; return r;
} }
r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
if (r) {
metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
return r;
}
if (!free_blocks) {
/* Let's commit before we use up the metadata reserve. */
r = commit(pool);
if (r)
return r;
}
return 0; return 0;
} }
...@@ -1501,6 +1550,7 @@ static blk_status_t should_error_unserviceable_bio(struct pool *pool) ...@@ -1501,6 +1550,7 @@ static blk_status_t should_error_unserviceable_bio(struct pool *pool)
case PM_OUT_OF_DATA_SPACE: case PM_OUT_OF_DATA_SPACE:
return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0; return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY: case PM_READ_ONLY:
case PM_FAIL: case PM_FAIL:
return BLK_STS_IOERR; return BLK_STS_IOERR;
...@@ -2464,8 +2514,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) ...@@ -2464,8 +2514,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
error_retry_list(pool); error_retry_list(pool);
break; break;
case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY: case PM_READ_ONLY:
if (old_mode != new_mode) if (!is_read_only_pool_mode(old_mode))
notify_of_pool_mode_change(pool, "read-only"); notify_of_pool_mode_change(pool, "read-only");
dm_pool_metadata_read_only(pool->pmd); dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_read_only; pool->process_bio = process_bio_read_only;
...@@ -3403,6 +3454,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) ...@@ -3403,6 +3454,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
DMINFO("%s: growing the metadata device from %llu to %llu blocks", DMINFO("%s: growing the metadata device from %llu to %llu blocks",
dm_device_name(pool->pool_md), dm_device_name(pool->pool_md),
sb_metadata_dev_size, metadata_dev_size); sb_metadata_dev_size, metadata_dev_size);
if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
set_pool_mode(pool, PM_WRITE);
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
if (r) { if (r) {
metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r); metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
...@@ -3707,7 +3762,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv, ...@@ -3707,7 +3762,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv,
struct pool_c *pt = ti->private; struct pool_c *pt = ti->private;
struct pool *pool = pt->pool; struct pool *pool = pt->pool;
if (get_pool_mode(pool) >= PM_READ_ONLY) { if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
dm_device_name(pool->pool_md)); dm_device_name(pool->pool_md));
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -3781,6 +3836,7 @@ static void pool_status(struct dm_target *ti, status_type_t type, ...@@ -3781,6 +3836,7 @@ static void pool_status(struct dm_target *ti, status_type_t type,
dm_block_t nr_blocks_data; dm_block_t nr_blocks_data;
dm_block_t nr_blocks_metadata; dm_block_t nr_blocks_metadata;
dm_block_t held_root; dm_block_t held_root;
enum pool_mode mode;
char buf[BDEVNAME_SIZE]; char buf[BDEVNAME_SIZE];
char buf2[BDEVNAME_SIZE]; char buf2[BDEVNAME_SIZE];
struct pool_c *pt = ti->private; struct pool_c *pt = ti->private;
...@@ -3851,9 +3907,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, ...@@ -3851,9 +3907,10 @@ static void pool_status(struct dm_target *ti, status_type_t type,
else else
DMEMIT("- "); DMEMIT("- ");
if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) mode = get_pool_mode(pool);
if (mode == PM_OUT_OF_DATA_SPACE)
DMEMIT("out_of_data_space "); DMEMIT("out_of_data_space ");
else if (pool->pf.mode == PM_READ_ONLY) else if (is_read_only_pool_mode(mode))
DMEMIT("ro "); DMEMIT("ro ");
else else
DMEMIT("rw "); DMEMIT("rw ");
......
...@@ -99,10 +99,26 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, ...@@ -99,10 +99,26 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
{ {
struct scatterlist sg; struct scatterlist sg;
if (likely(!is_vmalloc_addr(data))) {
sg_init_one(&sg, data, len); sg_init_one(&sg, data, len);
ahash_request_set_crypt(req, &sg, NULL, len); ahash_request_set_crypt(req, &sg, NULL, len);
return crypto_wait_req(crypto_ahash_update(req), wait); return crypto_wait_req(crypto_ahash_update(req), wait);
} else {
do {
int r;
size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data));
flush_kernel_vmap_range((void *)data, this_step);
sg_init_table(&sg, 1);
sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data));
ahash_request_set_crypt(req, &sg, NULL, this_step);
r = crypto_wait_req(crypto_ahash_update(req), wait);
if (unlikely(r))
return r;
data += this_step;
len -= this_step;
} while (len);
return 0;
}
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment