Commit 7272c30b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-3.6-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper updates from Alasdair G Kergon:
- Flip the thin target into new read-only or failed modes if errors
  are detected;
- Handle chunk sizes that are not powers of two in the snapshot and
  thin targets;
- Provide a way for userspace to avoid replacing an already-loaded
  multipath hardware handler while booting;
- Reduce dm_thin_endio_hook slab size to avoid allocation failures;
- Numerous small changes and cleanups to the code.

* tag 'dm-3.6-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (63 commits)
  dm thin: commit before gathering status
  dm thin: add read only and fail io modes
  dm thin metadata: introduce dm_pool_abort_metadata
  dm thin metadata: introduce dm_pool_metadata_set_read_only
  dm persistent data: introduce dm_bm_set_read_only
  dm thin: reduce number of metadata commits
  dm thin metadata: add dm_thin_changed_this_transaction
  dm thin metadata: add format option to dm_pool_metadata_open
  dm thin metadata: tidy up open and format error paths
  dm thin metadata: only check incompat features on open
  dm thin metadata: remove duplicate pmd initialisation
  dm thin metadata: remove create parameter from __create_persistent_data_objects
  dm thin metadata: move __superblock_all_zeroes to __open_or_format_metadata
  dm thin metadata: remove nr_blocks arg from __create_persistent_data_objects
  dm thin metadata: split __open or format metadata
  dm thin metadata: use struct dm_pool_metadata members in __open_or_format_metadata
  dm thin metadata: zero unused superblock uuid
  dm thin metadata: lift __begin_transaction out of __write_initial_superblock
  dm thin metadata: move dm_commit_pool_metadata into __write_initial_superblock
  dm thin metadata: factor out __write_initial_superblock
  ...
parents 6f51f515 1f4e0ff0
......@@ -9,15 +9,14 @@ devices in parallel.
Parameters: <num devs> <chunk size> [<dev path> <offset>]+
<num devs>: Number of underlying devices.
<chunk size>: Size of each chunk of data. Must be a power-of-2 and at
least as large as the system's PAGE_SIZE.
<chunk size>: Size of each chunk of data. Must be at least as
large as the system's PAGE_SIZE.
<dev path>: Full pathname to the underlying block-device, or a
"major:minor" device-number.
<offset>: Starting sector within the device.
One or more underlying devices can be specified. The striped device size must
be a multiple of the chunk size and a multiple of the number of underlying
devices.
be a multiple of the chunk size multiplied by the number of underlying devices.
Example scripts
......
......@@ -231,6 +231,9 @@ i) Constructor
no_discard_passdown: Don't pass discards down to the underlying
data device, but just remove the mapping.
read_only: Don't allow any changes to be made to the pool
metadata.
Data block size must be between 64KB (128 sectors) and 1GB
(2097152 sectors) inclusive.
......@@ -239,7 +242,7 @@ ii) Status
<transaction id> <used metadata blocks>/<total metadata blocks>
<used data blocks>/<total data blocks> <held metadata root>
[no_]discard_passdown ro|rw
transaction id:
A 64-bit number used by userspace to help synchronise with metadata
......@@ -257,6 +260,21 @@ ii) Status
held root. This feature is not yet implemented so '-' is
always returned.
discard_passdown|no_discard_passdown
Whether or not discards are actually being passed down to the
underlying device. When this is enabled when loading the table,
it can get disabled if the underlying device doesn't support it.
ro|rw
If the pool encounters certain types of device failures it will
drop into a read-only metadata mode in which no changes to
the pool metadata (like allocating new blocks) are permitted.
In serious cases where even a read-only mode is deemed unsafe
no further I/O will be permitted and the status will just
contain the string 'Fail'. The userspace recovery tools
should then be used.
iii) Messages
create_thin <dev id>
......@@ -329,3 +347,7 @@ regain some space then send the 'trim' message to the pool.
ii) Status
<nr mapped sectors> <highest mapped sector>
If the pool has encountered device errors and failed, the status
will just contain the string 'Fail'. The userspace recovery
tools should then be used.
......@@ -260,15 +260,6 @@ config DM_DEBUG_BLOCK_STACK_TRACING
If unsure, say N.
config DM_DEBUG_SPACE_MAPS
boolean "Extra validation for thin provisioning space maps"
depends on DM_THIN_PROVISIONING
---help---
Enable this for messages that may help debug problems with the
space maps used by thin provisioning.
If unsure, say N.
config DM_MIRROR
tristate "Mirror target"
depends on BLK_DEV_DM
......
This diff is collapsed.
......@@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
}
static int delay_status(struct dm_target *ti, status_type_t type,
char *result, unsigned maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
struct delay_c *dc = ti->private;
int sz = 0;
......
......@@ -142,24 +142,19 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister);
static int set_chunk_size(struct dm_exception_store *store,
const char *chunk_size_arg, char **error)
{
unsigned long chunk_size_ulong;
char *value;
unsigned chunk_size;
chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
if (*chunk_size_arg == '\0' || *value != '\0' ||
chunk_size_ulong > UINT_MAX) {
if (kstrtouint(chunk_size_arg, 10, &chunk_size)) {
*error = "Invalid chunk size";
return -EINVAL;
}
if (!chunk_size_ulong) {
if (!chunk_size) {
store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
return 0;
}
return dm_exception_store_set_chunk_size(store,
(unsigned) chunk_size_ulong,
error);
return dm_exception_store_set_chunk_size(store, chunk_size, error);
}
int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
......
......@@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
}
static int flakey_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
unsigned sz = 0;
struct flakey_c *fc = ti->private;
......
......@@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table,
char *outbuf, *outptr;
status_type_t type;
size_t remaining, len, used = 0;
unsigned status_flags = 0;
outptr = outbuf = get_result_buffer(param, param_size, &len);
......@@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table,
/* Get the status/table string from the target driver */
if (ti->type->status) {
if (ti->type->status(ti, type, outptr, remaining)) {
if (param->flags & DM_NOFLUSH_FLAG)
status_flags |= DM_STATUS_NOFLUSH_FLAG;
if (ti->type->status(ti, type, status_flags, outptr, remaining)) {
param->flags |= DM_BUFFER_FULL_FLAG;
break;
}
......
......@@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio,
}
static int linear_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
struct linear_c *lc = (struct linear_c *) ti->private;
......
......@@ -85,6 +85,7 @@ struct multipath {
unsigned queue_io:1; /* Must we queue all I/O? */
unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_count; /* Number of times pg_init called */
......@@ -568,6 +569,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
int r;
struct pgpath *p;
struct multipath *m = ti->private;
struct request_queue *q = NULL;
const char *attached_handler_name;
/* we need at least a path arg */
if (as->argc < 1) {
......@@ -586,13 +589,37 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
goto bad;
}
if (m->hw_handler_name) {
struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
if (m->retain_attached_hw_handler || m->hw_handler_name)
q = bdev_get_queue(p->path.dev->bdev);
if (m->retain_attached_hw_handler) {
attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
if (attached_handler_name) {
/*
* Reset hw_handler_name to match the attached handler
* and clear any hw_handler_params associated with the
* ignored handler.
*
* NB. This modifies the table line to show the actual
* handler instead of the original table passed in.
*/
kfree(m->hw_handler_name);
m->hw_handler_name = attached_handler_name;
kfree(m->hw_handler_params);
m->hw_handler_params = NULL;
}
}
if (m->hw_handler_name) {
/*
* Increments scsi_dh reference, even when using an
* already-attached handler.
*/
r = scsi_dh_attach(q, m->hw_handler_name);
if (r == -EBUSY) {
/*
* Already attached to different hw_handler,
* Already attached to different hw_handler:
* try to reattach with correct one.
*/
scsi_dh_detach(q);
......@@ -760,7 +787,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
const char *arg_name;
static struct dm_arg _args[] = {
{0, 5, "invalid number of feature args"},
{0, 6, "invalid number of feature args"},
{1, 50, "pg_init_retries must be between 1 and 50"},
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
};
......@@ -781,6 +808,11 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
continue;
}
if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
m->retain_attached_hw_handler = 1;
continue;
}
if (!strcasecmp(arg_name, "pg_init_retries") &&
(argc >= 1)) {
r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
......@@ -1346,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti)
* num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
*/
static int multipath_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
int sz = 0;
unsigned long flags;
......@@ -1364,13 +1396,16 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
else {
DMEMIT("%u ", m->queue_if_no_path +
(m->pg_init_retries > 0) * 2 +
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2);
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
m->retain_attached_hw_handler);
if (m->queue_if_no_path)
DMEMIT("queue_if_no_path ");
if (m->pg_init_retries)
DMEMIT("pg_init_retries %u ", m->pg_init_retries);
if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
if (m->retain_attached_hw_handler)
DMEMIT("retain_attached_hw_handler ");
}
if (!m->hw_handler_name || type == STATUSTYPE_INFO)
......@@ -1656,7 +1691,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
.version = {1, 4, 0},
.version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
......
......@@ -101,20 +101,12 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
{
unsigned i;
struct raid_set *rs;
sector_t sectors_per_dev;
if (raid_devs <= raid_type->parity_devs) {
ti->error = "Insufficient number of devices";
return ERR_PTR(-EINVAL);
}
sectors_per_dev = ti->len;
if ((raid_type->level > 1) &&
sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
ti->error = "Target length not divisible by number of data devices";
return ERR_PTR(-EINVAL);
}
rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
if (!rs) {
ti->error = "Cannot allocate raid context";
......@@ -128,7 +120,6 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
rs->md.raid_disks = raid_devs;
rs->md.level = raid_type->level;
rs->md.new_level = rs->md.level;
rs->md.dev_sectors = sectors_per_dev;
rs->md.layout = raid_type->algorithm;
rs->md.new_layout = rs->md.layout;
rs->md.delta_disks = 0;
......@@ -143,6 +134,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
* rs->md.external
* rs->md.chunk_sectors
* rs->md.new_chunk_sectors
* rs->md.dev_sectors
*/
return rs;
......@@ -353,6 +345,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
{
unsigned i, rebuild_cnt = 0;
unsigned long value, region_size = 0;
sector_t sectors_per_dev = rs->ti->len;
sector_t max_io_len;
char *key;
/*
......@@ -429,13 +423,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
if (!strcasecmp(key, "rebuild")) {
rebuild_cnt++;
if (((rs->raid_type->level != 1) &&
(rebuild_cnt > rs->raid_type->parity_devs)) ||
((rs->raid_type->level == 1) &&
(rebuild_cnt > (rs->md.raid_disks - 1)))) {
rs->ti->error = "Too many rebuild devices specified for given RAID type";
switch (rs->raid_type->level) {
case 1:
if (rebuild_cnt >= rs->md.raid_disks) {
rs->ti->error = "Too many rebuild devices specified";
return -EINVAL;
}
break;
case 4:
case 5:
case 6:
if (rebuild_cnt > rs->raid_type->parity_devs) {
rs->ti->error = "Too many rebuild devices specified for given RAID type";
return -EINVAL;
}
break;
default:
DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name);
rs->ti->error = "Rebuild not supported for this RAID type";
return -EINVAL;
}
if (value > rs->md.raid_disks) {
rs->ti->error = "Invalid rebuild index given";
return -EINVAL;
......@@ -522,14 +531,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
return -EINVAL;
if (rs->md.chunk_sectors)
rs->ti->split_io = rs->md.chunk_sectors;
max_io_len = rs->md.chunk_sectors;
else
rs->ti->split_io = region_size;
max_io_len = region_size;
if (rs->md.chunk_sectors)
rs->ti->split_io = rs->md.chunk_sectors;
else
rs->ti->split_io = region_size;
if (dm_set_target_max_io_len(rs->ti, max_io_len))
return -EINVAL;
if ((rs->raid_type->level > 1) &&
sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) {
rs->ti->error = "Target length not divisible by number of data devices";
return -EINVAL;
}
rs->md.dev_sectors = sectors_per_dev;
/* Assume there are no metadata devices until the drives are parsed */
rs->md.persistent = 0;
......@@ -1067,7 +1081,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c
}
static int raid_status(struct dm_target *ti, status_type_t type,
char *result, unsigned maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
struct raid_set *rs = ti->private;
unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
......
......@@ -1081,10 +1081,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = ms;
ti->split_io = dm_rh_get_region_size(ms->rh);
r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh));
if (r)
goto err_free_context;
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->discard_zeroes_data_unsupported = 1;
ti->discard_zeroes_data_unsupported = true;
ms->kmirrord_wq = alloc_workqueue("kmirrord",
WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
......@@ -1363,7 +1367,7 @@ static char device_status_char(struct mirror *m)
static int mirror_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
unsigned int m, sz = 0;
struct mirror_set *ms = (struct mirror_set *) ti->private;
......
......@@ -691,7 +691,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
* Return a minimum chunk size of all snapshots that have the specified origin.
* Return zero if the origin has no snapshots.
*/
static sector_t __minimum_chunk_size(struct origin *o)
static uint32_t __minimum_chunk_size(struct origin *o)
{
struct dm_snapshot *snap;
unsigned chunk_size = 0;
......@@ -701,7 +701,7 @@ static sector_t __minimum_chunk_size(struct origin *o)
chunk_size = min_not_zero(chunk_size,
snap->store->chunk_size);
return chunk_size;
return (uint32_t) chunk_size;
}
/*
......@@ -1172,7 +1172,10 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Chunk size not set";
goto bad_read_metadata;
}
ti->split_io = s->store->chunk_size;
r = dm_set_target_max_io_len(ti, s->store->chunk_size);
if (r)
goto bad_read_metadata;
return 0;
......@@ -1239,7 +1242,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src,
snap_dest->store->snap = snap_dest;
snap_src->store->snap = snap_src;
snap_dest->ti->split_io = snap_dest->store->chunk_size;
snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
snap_dest->valid = snap_src->valid;
/*
......@@ -1817,9 +1820,9 @@ static void snapshot_resume(struct dm_target *ti)
up_write(&s->lock);
}
static sector_t get_origin_minimum_chunksize(struct block_device *bdev)
static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
{
sector_t min_chunksize;
uint32_t min_chunksize;
down_read(&_origins_lock);
min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
......@@ -1838,15 +1841,15 @@ static void snapshot_merge_resume(struct dm_target *ti)
snapshot_resume(ti);
/*
* snapshot-merge acts as an origin, so set ti->split_io
* snapshot-merge acts as an origin, so set ti->max_io_len
*/
ti->split_io = get_origin_minimum_chunksize(s->origin->bdev);
ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev);
start_merge(s);
}
static int snapshot_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
......@@ -2073,12 +2076,12 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
struct origin *o;
/*
* The origin's __minimum_chunk_size() got stored in split_io
* The origin's __minimum_chunk_size() got stored in max_io_len
* by snapshot_merge_resume().
*/
down_read(&_origins_lock);
o = __lookup_origin(merging_snap->origin->bdev);
for (n = 0; n < size; n += merging_snap->ti->split_io)
for (n = 0; n < size; n += merging_snap->ti->max_io_len)
if (__origin_write(&o->snapshots, sector + n, NULL) ==
DM_MAPIO_SUBMITTED)
must_wait = 1;
......@@ -2138,18 +2141,18 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
}
/*
* Set the target "split_io" field to the minimum of all the snapshots'
* Set the target "max_io_len" field to the minimum of all the snapshots'
* chunk sizes.
*/
static void origin_resume(struct dm_target *ti)
{
struct dm_dev *dev = ti->private;
ti->split_io = get_origin_minimum_chunksize(dev->bdev);
ti->max_io_len = get_origin_minimum_chunksize(dev->bdev);
}
static int origin_status(struct dm_target *ti, status_type_t type, char *result,
unsigned int maxlen)
static int origin_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct dm_dev *dev = ti->private;
......@@ -2176,7 +2179,6 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return max_size;
bvm->bi_bdev = dev->bdev;
bvm->bi_sector = bvm->bi_sector;
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
......
......@@ -26,14 +26,12 @@ struct stripe {
struct stripe_c {
uint32_t stripes;
int stripes_shift;
sector_t stripes_mask;
/* The size of this target / num. stripes */
sector_t stripe_width;
/* stripe chunk size */
uint32_t chunk_shift;
sector_t chunk_mask;
uint32_t chunk_size;
int chunk_size_shift;
/* Needed for handling events */
struct dm_target *ti;
......@@ -91,7 +89,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
/*
* Construct a striped mapping.
* <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
* <number of stripes> <chunk size> [<dev_path> <offset>]+
*/
static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
......@@ -99,7 +97,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
sector_t width;
uint32_t stripes;
uint32_t chunk_size;
char *end;
int r;
unsigned int i;
......@@ -108,34 +105,23 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
stripes = simple_strtoul(argv[0], &end, 10);
if (!stripes || *end) {
if (kstrtouint(argv[0], 10, &stripes) || !stripes) {
ti->error = "Invalid stripe count";
return -EINVAL;
}
chunk_size = simple_strtoul(argv[1], &end, 10);
if (*end) {
if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) {
ti->error = "Invalid chunk_size";
return -EINVAL;
}
/*
* chunk_size is a power of two
*/
if (!is_power_of_2(chunk_size) ||
(chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
ti->error = "Invalid chunk size";
return -EINVAL;
}
if (ti->len & (chunk_size - 1)) {
width = ti->len;
if (sector_div(width, chunk_size)) {
ti->error = "Target length not divisible by "
"chunk size";
return -EINVAL;
}
width = ti->len;
if (sector_div(width, stripes)) {
ti->error = "Target length not divisible by "
"number of stripes";
......@@ -167,17 +153,21 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (stripes & (stripes - 1))
sc->stripes_shift = -1;
else {
sc->stripes_shift = ffs(stripes) - 1;
sc->stripes_mask = ((sector_t) stripes) - 1;
}
else
sc->stripes_shift = __ffs(stripes);
r = dm_set_target_max_io_len(ti, chunk_size);
if (r)
return r;
ti->split_io = chunk_size;
ti->num_flush_requests = stripes;
ti->num_discard_requests = stripes;
sc->chunk_shift = ffs(chunk_size) - 1;
sc->chunk_mask = ((sector_t) chunk_size) - 1;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
sc->chunk_size_shift = -1;
else
sc->chunk_size_shift = __ffs(chunk_size);
/*
* Get the stripe destinations.
......@@ -216,17 +206,29 @@ static void stripe_dtr(struct dm_target *ti)
static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
uint32_t *stripe, sector_t *result)
{
sector_t offset = dm_target_offset(sc->ti, sector);
sector_t chunk = offset >> sc->chunk_shift;
sector_t chunk = dm_target_offset(sc->ti, sector);
sector_t chunk_offset;
if (sc->chunk_size_shift < 0)
chunk_offset = sector_div(chunk, sc->chunk_size);
else {
chunk_offset = chunk & (sc->chunk_size - 1);
chunk >>= sc->chunk_size_shift;
}
if (sc->stripes_shift < 0)
*stripe = sector_div(chunk, sc->stripes);
else {
*stripe = chunk & sc->stripes_mask;
*stripe = chunk & (sc->stripes - 1);
chunk >>= sc->stripes_shift;
}
*result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask);
if (sc->chunk_size_shift < 0)
chunk *= sc->chunk_size;
else
chunk <<= sc->chunk_size_shift;
*result = chunk + chunk_offset;
}
static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
......@@ -237,9 +239,16 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
stripe_map_sector(sc, sector, &stripe, result);
if (stripe == target_stripe)
return;
*result &= ~sc->chunk_mask; /* round down */
/* round down */
sector = *result;
if (sc->chunk_size_shift < 0)
*result -= sector_div(sector, sc->chunk_size);
else
*result = sector & ~(sector_t)(sc->chunk_size - 1);
if (target_stripe < stripe)
*result += sc->chunk_mask + 1; /* next chunk */
*result += sc->chunk_size; /* next chunk */
}
static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
......@@ -302,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
*
*/
static int stripe_status(struct dm_target *ti,
status_type_t type, char *result, unsigned int maxlen)
static int stripe_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct stripe_c *sc = (struct stripe_c *) ti->private;
char buffer[sc->stripes + 1];
......@@ -324,7 +333,7 @@ static int stripe_status(struct dm_target *ti,
case STATUSTYPE_TABLE:
DMEMIT("%d %llu", sc->stripes,
(unsigned long long)sc->chunk_mask + 1);
(unsigned long long)sc->chunk_size);
for (i = 0; i < sc->stripes; i++)
DMEMIT(" %s %llu", sc->stripe[i].dev->name,
(unsigned long long)sc->stripe[i].physical_start);
......@@ -391,7 +400,7 @@ static void stripe_io_hints(struct dm_target *ti,
struct queue_limits *limits)
{
struct stripe_c *sc = ti->private;
unsigned chunk_size = (sc->chunk_mask + 1) << 9;
unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT;
blk_limits_io_min(limits, chunk_size);
blk_limits_io_opt(limits, chunk_size * sc->stripes);
......@@ -419,7 +428,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
static struct target_type stripe_target = {
.name = "striped",
.version = {1, 4, 0},
.version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
......
......@@ -1319,6 +1319,9 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
if (!ti->num_flush_requests)
continue;
if (ti->flush_supported)
return 1;
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, device_flush_capable, &flush))
return 1;
......
This diff is collapsed.
......@@ -38,7 +38,8 @@ typedef uint64_t dm_thin_id;
* Reopens or creates a new, empty metadata volume.
*/
struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
sector_t data_block_size);
sector_t data_block_size,
bool format_device);
int dm_pool_metadata_close(struct dm_pool_metadata *pmd);
......@@ -78,6 +79,16 @@ int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
*/
int dm_pool_commit_metadata(struct dm_pool_metadata *pmd);
/*
* Discards all uncommitted changes. Rereads the superblock, rolling back
* to the last good transaction. Thin devices remain open.
* dm_thin_aborted_changes() tells you if they had uncommitted changes.
*
* If this call fails it's only useful to call dm_pool_metadata_close().
* All other methods will fail with -EINVAL.
*/
int dm_pool_abort_metadata(struct dm_pool_metadata *pmd);
/*
* Set/get userspace transaction id.
*/
......@@ -119,7 +130,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td);
struct dm_thin_lookup_result {
dm_block_t block;
int shared;
unsigned shared:1;
};
/*
......@@ -147,6 +158,10 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block);
/*
* Queries.
*/
bool dm_thin_changed_this_transaction(struct dm_thin_device *td);
bool dm_thin_aborted_changes(struct dm_thin_device *td);
int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
dm_block_t *highest_mapped);
......@@ -171,6 +186,12 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
*/
int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
/*
* Flicks the underlying block manager into read only mode, so you know
* that nothing is changing.
*/
void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd);
/*----------------------------------------------------------------*/
#endif
This diff is collapsed.
......@@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio,
* Status: V (valid) or C (corruption found)
*/
static int verity_status(struct dm_target *ti, status_type_t type,
char *result, unsigned maxlen)
unsigned status_flags, char *result, unsigned maxlen)
{
struct dm_verity *v = ti->private;
unsigned sz = 0;
......
......@@ -968,22 +968,41 @@ static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti
static sector_t max_io_len(sector_t sector, struct dm_target *ti)
{
sector_t len = max_io_len_target_boundary(sector, ti);
sector_t offset, max_len;
/*
* Does the target need to split even further ?
* Does the target need to split even further?
*/
if (ti->split_io) {
sector_t boundary;
sector_t offset = dm_target_offset(ti, sector);
boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
- offset;
if (len > boundary)
len = boundary;
if (ti->max_io_len) {
offset = dm_target_offset(ti, sector);
if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
max_len = sector_div(offset, ti->max_io_len);
else
max_len = offset & (ti->max_io_len - 1);
max_len = ti->max_io_len - max_len;
if (len > max_len)
len = max_len;
}
return len;
}
int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
{
if (len > UINT_MAX) {
DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
(unsigned long long)len, UINT_MAX);
ti->error = "Maximum size of target IO is too large";
return -EINVAL;
}
ti->max_io_len = (uint32_t) len;
return 0;
}
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
static void __map_bio(struct dm_target *ti, struct bio *clone,
struct dm_target_io *tio)
{
......@@ -1196,7 +1215,10 @@ static int __clone_and_map_discard(struct clone_info *ci)
if (!ti->num_discard_requests)
return -EOPNOTSUPP;
len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
if (!ti->split_discard_requests)
len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
else
len = min(ci->sector_count, max_io_len(ci->sector, ti));
__issue_target_requests(ci, ti, ti->num_discard_requests, len);
......
......@@ -22,6 +22,11 @@
#define DM_SUSPEND_LOCKFS_FLAG (1 << 0)
#define DM_SUSPEND_NOFLUSH_FLAG (1 << 1)
/*
* Status feature flags
*/
#define DM_STATUS_NOFLUSH_FLAG (1 << 0)
/*
* Type of table and mapped_device's mempool
*/
......
obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
dm-persistent-data-objs := \
dm-block-manager.o \
dm-space-map-checker.o \
dm-space-map-common.o \
dm-space-map-disk.o \
dm-space-map-metadata.o \
......
......@@ -325,11 +325,6 @@ static struct dm_buffer *to_buffer(struct dm_block *b)
return (struct dm_buffer *) b;
}
static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm)
{
return (struct dm_bufio_client *) bm;
}
dm_block_t dm_block_location(struct dm_block *b)
{
return dm_bufio_get_block_number(to_buffer(b));
......@@ -367,34 +362,60 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf)
/*----------------------------------------------------------------
* Public interface
*--------------------------------------------------------------*/
struct dm_block_manager {
struct dm_bufio_client *bufio;
bool read_only:1;
};
struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
unsigned block_size,
unsigned cache_size,
unsigned max_held_per_thread)
{
return (struct dm_block_manager *)
dm_bufio_client_create(bdev, block_size, max_held_per_thread,
sizeof(struct buffer_aux),
dm_block_manager_alloc_callback,
dm_block_manager_write_callback);
int r;
struct dm_block_manager *bm;
bm = kmalloc(sizeof(*bm), GFP_KERNEL);
if (!bm) {
r = -ENOMEM;
goto bad;
}
bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
sizeof(struct buffer_aux),
dm_block_manager_alloc_callback,
dm_block_manager_write_callback);
if (IS_ERR(bm->bufio)) {
r = PTR_ERR(bm->bufio);
kfree(bm);
goto bad;
}
bm->read_only = false;
return bm;
bad:
return ERR_PTR(r);
}
EXPORT_SYMBOL_GPL(dm_block_manager_create);
void dm_block_manager_destroy(struct dm_block_manager *bm)
{
return dm_bufio_client_destroy(to_bufio(bm));
dm_bufio_client_destroy(bm->bufio);
kfree(bm);
}
EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
unsigned dm_bm_block_size(struct dm_block_manager *bm)
{
return dm_bufio_get_block_size(to_bufio(bm));
return dm_bufio_get_block_size(bm->bufio);
}
EXPORT_SYMBOL_GPL(dm_bm_block_size);
dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
{
return dm_bufio_get_device_size(to_bufio(bm));
return dm_bufio_get_device_size(bm->bufio);
}
static int dm_bm_validate_buffer(struct dm_block_manager *bm,
......@@ -406,7 +427,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm,
int r;
if (!v)
return 0;
r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm)));
r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
if (unlikely(r))
return r;
aux->validator = v;
......@@ -430,7 +451,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
void *p;
int r;
p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
if (unlikely(IS_ERR(p)))
return PTR_ERR(p);
......@@ -463,7 +484,10 @@ int dm_bm_write_lock(struct dm_block_manager *bm,
void *p;
int r;
p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
if (bm->read_only)
return -EPERM;
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
if (unlikely(IS_ERR(p)))
return PTR_ERR(p);
......@@ -496,7 +520,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm,
void *p;
int r;
p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result);
p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
if (unlikely(IS_ERR(p)))
return PTR_ERR(p);
if (unlikely(!p))
......@@ -529,7 +553,10 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm,
struct buffer_aux *aux;
void *p;
p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result);
if (bm->read_only)
return -EPERM;
p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
if (unlikely(IS_ERR(p)))
return PTR_ERR(p);
......@@ -547,6 +574,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm,
return 0;
}
EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero);
int dm_bm_unlock(struct dm_block *b)
{
......@@ -565,45 +593,30 @@ int dm_bm_unlock(struct dm_block *b)
}
EXPORT_SYMBOL_GPL(dm_bm_unlock);
int dm_bm_unlock_move(struct dm_block *b, dm_block_t n)
{
struct buffer_aux *aux;
aux = dm_bufio_get_aux_data(to_buffer(b));
if (aux->write_locked) {
dm_bufio_mark_buffer_dirty(to_buffer(b));
bl_up_write(&aux->lock);
} else
bl_up_read(&aux->lock);
dm_bufio_release_move(to_buffer(b), n);
return 0;
}
int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
struct dm_block *superblock)
{
int r;
r = dm_bufio_write_dirty_buffers(to_bufio(bm));
if (unlikely(r))
return r;
r = dm_bufio_issue_flush(to_bufio(bm));
if (unlikely(r))
if (bm->read_only)
return -EPERM;
r = dm_bufio_write_dirty_buffers(bm->bufio);
if (unlikely(r)) {
dm_bm_unlock(superblock);
return r;
}
dm_bm_unlock(superblock);
r = dm_bufio_write_dirty_buffers(to_bufio(bm));
if (unlikely(r))
return r;
r = dm_bufio_issue_flush(to_bufio(bm));
if (unlikely(r))
return r;
return dm_bufio_write_dirty_buffers(bm->bufio);
}
return 0;
void dm_bm_set_read_only(struct dm_block_manager *bm)
{
bm->read_only = true;
}
EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
{
......
......@@ -96,14 +96,6 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b,
int dm_bm_unlock(struct dm_block *b);
/*
* An optimisation; we often want to copy a block's contents to a new
* block. eg, as part of the shadowing operation. It's far better for
* bufio to do this move behind the scenes than hold 2 locks and memcpy the
* data.
*/
int dm_bm_unlock_move(struct dm_block *b, dm_block_t n);
/*
* It's a common idiom to have a superblock that should be committed last.
*
......@@ -116,6 +108,19 @@ int dm_bm_unlock_move(struct dm_block *b, dm_block_t n);
int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
struct dm_block *superblock);
/*
* Switches the bm to a read only mode. Once read-only mode
* has been entered the following functions will return -EPERM.
*
* dm_bm_write_lock
* dm_bm_write_lock_zero
* dm_bm_flush_and_unlock
*
* Additionally you should not use dm_bm_unlock_move, however no error will
* be returned if you do.
*/
void dm_bm_set_read_only(struct dm_block_manager *bm);
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
/*----------------------------------------------------------------*/
......
/*
* Copyright (C) 2011 Red Hat, Inc.
*
* This file is released under the GPL.
*/
#include "dm-space-map-checker.h"
#include <linux/device-mapper.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
#define DM_MSG_PREFIX "space map checker"
/*----------------------------------------------------------------*/
struct count_array {
dm_block_t nr;
dm_block_t nr_free;
uint32_t *counts;
};
static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count)
{
if (b >= ca->nr)
return -EINVAL;
*count = ca->counts[b];
return 0;
}
static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r)
{
if (b >= ca->nr)
return -EINVAL;
*r = ca->counts[b] > 1;
return 0;
}
static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count)
{
uint32_t old_count;
if (b >= ca->nr)
return -EINVAL;
old_count = ca->counts[b];
if (!count && old_count)
ca->nr_free++;
else if (count && !old_count)
ca->nr_free--;
ca->counts[b] = count;
return 0;
}
static int ca_inc_block(struct count_array *ca, dm_block_t b)
{
if (b >= ca->nr)
return -EINVAL;
ca_set_count(ca, b, ca->counts[b] + 1);
return 0;
}
static int ca_dec_block(struct count_array *ca, dm_block_t b)
{
if (b >= ca->nr)
return -EINVAL;
BUG_ON(ca->counts[b] == 0);
ca_set_count(ca, b, ca->counts[b] - 1);
return 0;
}
static int ca_create(struct count_array *ca, struct dm_space_map *sm)
{
int r;
dm_block_t nr_blocks;
r = dm_sm_get_nr_blocks(sm, &nr_blocks);
if (r)
return r;
ca->nr = nr_blocks;
ca->nr_free = nr_blocks;
if (!nr_blocks)
ca->counts = NULL;
else {
ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks);
if (!ca->counts)
return -ENOMEM;
}
return 0;
}
static void ca_destroy(struct count_array *ca)
{
vfree(ca->counts);
}
static int ca_load(struct count_array *ca, struct dm_space_map *sm)
{
int r;
uint32_t count;
dm_block_t nr_blocks, i;
r = dm_sm_get_nr_blocks(sm, &nr_blocks);
if (r)
return r;
BUG_ON(ca->nr != nr_blocks);
DMWARN("Loading debug space map from disk. This may take some time");
for (i = 0; i < nr_blocks; i++) {
r = dm_sm_get_count(sm, i, &count);
if (r) {
DMERR("load failed");
return r;
}
ca_set_count(ca, i, count);
}
DMWARN("Load complete");
return 0;
}
static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
{
dm_block_t nr_blocks = ca->nr + extra_blocks;
uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks);
if (!counts)
return -ENOMEM;
if (ca->counts) {
memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
ca_destroy(ca);
}
ca->nr = nr_blocks;
ca->nr_free += extra_blocks;
ca->counts = counts;
return 0;
}
static int ca_commit(struct count_array *old, struct count_array *new)
{
if (old->nr != new->nr) {
BUG_ON(old->nr > new->nr);
ca_extend(old, new->nr - old->nr);
}
BUG_ON(old->nr != new->nr);
old->nr_free = new->nr_free;
memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr);
return 0;
}
/*----------------------------------------------------------------*/
struct sm_checker {
struct dm_space_map sm;
struct count_array old_counts;
struct count_array counts;
struct dm_space_map *real_sm;
};
static void sm_checker_destroy(struct dm_space_map *sm)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
dm_sm_destroy(smc->real_sm);
ca_destroy(&smc->old_counts);
ca_destroy(&smc->counts);
kfree(smc);
}
static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r = dm_sm_get_nr_blocks(smc->real_sm, count);
if (!r)
BUG_ON(smc->old_counts.nr != *count);
return r;
}
static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r = dm_sm_get_nr_free(smc->real_sm, count);
if (!r) {
/*
* Slow, but we know it's correct.
*/
dm_block_t b, n = 0;
for (b = 0; b < smc->old_counts.nr; b++)
if (smc->old_counts.counts[b] == 0 &&
smc->counts.counts[b] == 0)
n++;
if (n != *count)
DMERR("free block counts differ, checker %u, sm-disk:%u",
(unsigned) n, (unsigned) *count);
}
return r;
}
static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r = dm_sm_new_block(smc->real_sm, b);
if (!r) {
BUG_ON(*b >= smc->old_counts.nr);
BUG_ON(smc->old_counts.counts[*b] != 0);
BUG_ON(*b >= smc->counts.nr);
BUG_ON(smc->counts.counts[*b] != 0);
ca_set_count(&smc->counts, *b, 1);
}
return r;
}
static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r = dm_sm_inc_block(smc->real_sm, b);
int r2 = ca_inc_block(&smc->counts, b);
BUG_ON(r != r2);
return r;
}
static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r = dm_sm_dec_block(smc->real_sm, b);
int r2 = ca_dec_block(&smc->counts, b);
BUG_ON(r != r2);
return r;
}
static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
uint32_t result2 = 0;
int r = dm_sm_get_count(smc->real_sm, b, result);
int r2 = ca_get_count(&smc->counts, b, &result2);
BUG_ON(r != r2);
if (!r)
BUG_ON(*result != result2);
return r;
}
static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int result2 = 0;
int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result);
int r2 = ca_count_more_than_one(&smc->counts, b, &result2);
BUG_ON(r != r2);
if (!r)
BUG_ON(!(*result) && result2);
return r;
}
static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
uint32_t old_rc;
int r = dm_sm_set_count(smc->real_sm, b, count);
int r2;
BUG_ON(b >= smc->counts.nr);
old_rc = smc->counts.counts[b];
r2 = ca_set_count(&smc->counts, b, count);
BUG_ON(r != r2);
return r;
}
static int sm_checker_commit(struct dm_space_map *sm)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r;
r = dm_sm_commit(smc->real_sm);
if (r)
return r;
r = ca_commit(&smc->old_counts, &smc->counts);
if (r)
return r;
return 0;
}
static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
int r = dm_sm_extend(smc->real_sm, extra_blocks);
if (r)
return r;
return ca_extend(&smc->counts, extra_blocks);
}
static int sm_checker_root_size(struct dm_space_map *sm, size_t *result)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
return dm_sm_root_size(smc->real_sm, result);
}
static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len)
{
struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len);
}
/*----------------------------------------------------------------*/
static struct dm_space_map ops_ = {
.destroy = sm_checker_destroy,
.get_nr_blocks = sm_checker_get_nr_blocks,
.get_nr_free = sm_checker_get_nr_free,
.inc_block = sm_checker_inc_block,
.dec_block = sm_checker_dec_block,
.new_block = sm_checker_new_block,
.get_count = sm_checker_get_count,
.count_is_more_than_one = sm_checker_count_more_than_one,
.set_count = sm_checker_set_count,
.commit = sm_checker_commit,
.extend = sm_checker_extend,
.root_size = sm_checker_root_size,
.copy_root = sm_checker_copy_root
};
struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
{
int r;
struct sm_checker *smc;
if (IS_ERR_OR_NULL(sm))
return ERR_PTR(-EINVAL);
smc = kmalloc(sizeof(*smc), GFP_KERNEL);
if (!smc)
return ERR_PTR(-ENOMEM);
memcpy(&smc->sm, &ops_, sizeof(smc->sm));
r = ca_create(&smc->old_counts, sm);
if (r) {
kfree(smc);
return ERR_PTR(r);
}
r = ca_create(&smc->counts, sm);
if (r) {
ca_destroy(&smc->old_counts);
kfree(smc);
return ERR_PTR(r);
}
smc->real_sm = sm;
r = ca_load(&smc->counts, sm);
if (r) {
ca_destroy(&smc->counts);
ca_destroy(&smc->old_counts);
kfree(smc);
return ERR_PTR(r);
}
r = ca_commit(&smc->old_counts, &smc->counts);
if (r) {
ca_destroy(&smc->counts);
ca_destroy(&smc->old_counts);
kfree(smc);
return ERR_PTR(r);
}
return &smc->sm;
}
EXPORT_SYMBOL_GPL(dm_sm_checker_create);
struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
{
int r;
struct sm_checker *smc;
if (IS_ERR_OR_NULL(sm))
return ERR_PTR(-EINVAL);
smc = kmalloc(sizeof(*smc), GFP_KERNEL);
if (!smc)
return ERR_PTR(-ENOMEM);
memcpy(&smc->sm, &ops_, sizeof(smc->sm));
r = ca_create(&smc->old_counts, sm);
if (r) {
kfree(smc);
return ERR_PTR(r);
}
r = ca_create(&smc->counts, sm);
if (r) {
ca_destroy(&smc->old_counts);
kfree(smc);
return ERR_PTR(r);
}
smc->real_sm = sm;
return &smc->sm;
}
EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
/*----------------------------------------------------------------*/
#else
struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
{
return sm;
}
EXPORT_SYMBOL_GPL(dm_sm_checker_create);
struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
{
return sm;
}
EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
/*----------------------------------------------------------------*/
#endif
/*
* Copyright (C) 2011 Red Hat, Inc.
*
* This file is released under the GPL.
*/
#ifndef SNAPSHOTS_SPACE_MAP_CHECKER_H
#define SNAPSHOTS_SPACE_MAP_CHECKER_H
#include "dm-space-map.h"
/*----------------------------------------------------------------*/
/*
* This space map wraps a real on-disk space map, and verifies all of its
* operations. It uses a lot of memory, so only use if you have a specific
* problem that you're debugging.
*
* Ownership of @sm passes.
*/
struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm);
struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm);
/*----------------------------------------------------------------*/
#endif
......@@ -224,6 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
ll->nr_blocks = 0;
ll->bitmap_root = 0;
ll->ref_count_root = 0;
ll->bitmap_index_changed = false;
return 0;
}
......@@ -476,7 +477,15 @@ int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
int sm_ll_commit(struct ll_disk *ll)
{
return ll->commit(ll);
int r = 0;
if (ll->bitmap_index_changed) {
r = ll->commit(ll);
if (!r)
ll->bitmap_index_changed = false;
}
return r;
}
/*----------------------------------------------------------------*/
......@@ -491,6 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
struct disk_index_entry *ie)
{
ll->bitmap_index_changed = true;
memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
return 0;
}
......
......@@ -78,6 +78,7 @@ struct ll_disk {
open_index_fn open_index;
max_index_entries_fn max_entries;
commit_fn commit;
bool bitmap_index_changed:1;
};
struct disk_sm_root {
......
......@@ -4,7 +4,6 @@
* This file is released under the GPL.
*/
#include "dm-space-map-checker.h"
#include "dm-space-map-common.h"
#include "dm-space-map-disk.h"
#include "dm-space-map.h"
......@@ -252,9 +251,8 @@ static struct dm_space_map ops = {
.copy_root = sm_disk_copy_root
};
static struct dm_space_map *dm_sm_disk_create_real(
struct dm_transaction_manager *tm,
dm_block_t nr_blocks)
struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
dm_block_t nr_blocks)
{
int r;
struct sm_disk *smd;
......@@ -285,27 +283,10 @@ static struct dm_space_map *dm_sm_disk_create_real(
kfree(smd);
return ERR_PTR(r);
}
struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
dm_block_t nr_blocks)
{
struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
struct dm_space_map *smc;
if (IS_ERR_OR_NULL(sm))
return sm;
smc = dm_sm_checker_create_fresh(sm);
if (IS_ERR(smc))
dm_sm_destroy(sm);
return smc;
}
EXPORT_SYMBOL_GPL(dm_sm_disk_create);
static struct dm_space_map *dm_sm_disk_open_real(
struct dm_transaction_manager *tm,
void *root_le, size_t len)
struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
void *root_le, size_t len)
{
int r;
struct sm_disk *smd;
......@@ -332,13 +313,6 @@ static struct dm_space_map *dm_sm_disk_open_real(
kfree(smd);
return ERR_PTR(r);
}
struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
void *root_le, size_t len)
{
return dm_sm_checker_create(
dm_sm_disk_open_real(tm, root_le, len));
}
EXPORT_SYMBOL_GPL(dm_sm_disk_open);
/*----------------------------------------------------------------*/
......@@ -5,7 +5,6 @@
*/
#include "dm-transaction-manager.h"
#include "dm-space-map.h"
#include "dm-space-map-checker.h"
#include "dm-space-map-disk.h"
#include "dm-space-map-metadata.h"
#include "dm-persistent-data-internal.h"
......@@ -220,13 +219,24 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
if (r < 0)
return r;
r = dm_bm_unlock_move(orig_block, new);
if (r < 0) {
/*
* It would be tempting to use dm_bm_unlock_move here, but some
* code, such as the space maps, keeps using the old data structures
* secure in the knowledge they won't be changed until the next
* transaction. Using unlock_move would force a synchronous read
* since the old block would no longer be in the cache.
*/
r = dm_bm_write_lock_zero(tm->bm, new, v, result);
if (r) {
dm_bm_unlock(orig_block);
return r;
}
return dm_bm_write_lock(tm->bm, new, v, result);
memcpy(dm_block_data(*result), dm_block_data(orig_block),
dm_bm_block_size(tm->bm));
dm_bm_unlock(orig_block);
return r;
}
int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
......@@ -311,98 +321,61 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
static int dm_tm_create_internal(struct dm_block_manager *bm,
dm_block_t sb_location,
struct dm_block_validator *sb_validator,
size_t root_offset, size_t root_max_len,
struct dm_transaction_manager **tm,
struct dm_space_map **sm,
struct dm_block **sblock,
int create)
int create,
void *sm_root, size_t sm_len)
{
int r;
struct dm_space_map *inner;
inner = dm_sm_metadata_init();
if (IS_ERR(inner))
return PTR_ERR(inner);
*sm = dm_sm_metadata_init();
if (IS_ERR(*sm))
return PTR_ERR(*sm);
*tm = dm_tm_create(bm, inner);
*tm = dm_tm_create(bm, *sm);
if (IS_ERR(*tm)) {
dm_sm_destroy(inner);
dm_sm_destroy(*sm);
return PTR_ERR(*tm);
}
if (create) {
r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location,
sb_validator, sblock);
if (r < 0) {
DMERR("couldn't lock superblock");
goto bad1;
}
r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm),
r = dm_sm_metadata_create(*sm, *tm, dm_bm_nr_blocks(bm),
sb_location);
if (r) {
DMERR("couldn't create metadata space map");
goto bad2;
}
*sm = dm_sm_checker_create(inner);
if (IS_ERR(*sm)) {
r = PTR_ERR(*sm);
goto bad2;
goto bad;
}
} else {
r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
sb_validator, sblock);
if (r < 0) {
DMERR("couldn't lock superblock");
goto bad1;
}
r = dm_sm_metadata_open(inner, *tm,
dm_block_data(*sblock) + root_offset,
root_max_len);
r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len);
if (r) {
DMERR("couldn't open metadata space map");
goto bad2;
}
*sm = dm_sm_checker_create(inner);
if (IS_ERR(*sm)) {
r = PTR_ERR(*sm);
goto bad2;
goto bad;
}
}
return 0;
bad2:
dm_tm_unlock(*tm, *sblock);
bad1:
bad:
dm_tm_destroy(*tm);
dm_sm_destroy(inner);
dm_sm_destroy(*sm);
return r;
}
int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
struct dm_block_validator *sb_validator,
struct dm_transaction_manager **tm,
struct dm_space_map **sm, struct dm_block **sblock)
struct dm_space_map **sm)
{
return dm_tm_create_internal(bm, sb_location, sb_validator,
0, 0, tm, sm, sblock, 1);
return dm_tm_create_internal(bm, sb_location, tm, sm, 1, NULL, 0);
}
EXPORT_SYMBOL_GPL(dm_tm_create_with_sm);
int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
struct dm_block_validator *sb_validator,
size_t root_offset, size_t root_max_len,
void *sm_root, size_t root_len,
struct dm_transaction_manager **tm,
struct dm_space_map **sm, struct dm_block **sblock)
struct dm_space_map **sm)
{
return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset,
root_max_len, tm, sm, sblock, 0);
return dm_tm_create_internal(bm, sb_location, tm, sm, 0, sm_root, root_len);
}
EXPORT_SYMBOL_GPL(dm_tm_open_with_sm);
......
......@@ -115,16 +115,17 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm);
*
* Returns a tm that has an open transaction to write the new disk sm.
* Caller should store the new sm root and commit.
*
* The superblock location is passed so the metadata space map knows it
* shouldn't be used.
*/
int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
struct dm_block_validator *sb_validator,
struct dm_transaction_manager **tm,
struct dm_space_map **sm, struct dm_block **sblock);
struct dm_space_map **sm);
int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
struct dm_block_validator *sb_validator,
size_t root_offset, size_t root_max_len,
void *sm_root, size_t root_len,
struct dm_transaction_manager **tm,
struct dm_space_map **sm, struct dm_block **sblock);
struct dm_space_map **sm);
#endif /* _LINUX_DM_TRANSACTION_MANAGER_H */
......@@ -66,14 +66,13 @@ typedef int (*dm_request_endio_fn) (struct dm_target *ti,
struct request *clone, int error,
union map_info *map_context);
typedef void (*dm_flush_fn) (struct dm_target *ti);
typedef void (*dm_presuspend_fn) (struct dm_target *ti);
typedef void (*dm_postsuspend_fn) (struct dm_target *ti);
typedef int (*dm_preresume_fn) (struct dm_target *ti);
typedef void (*dm_resume_fn) (struct dm_target *ti);
typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
char *result, unsigned int maxlen);
unsigned status_flags, char *result, unsigned maxlen);
typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
......@@ -139,7 +138,6 @@ struct target_type {
dm_map_request_fn map_rq;
dm_endio_fn end_io;
dm_request_endio_fn rq_end_io;
dm_flush_fn flush;
dm_presuspend_fn presuspend;
dm_postsuspend_fn postsuspend;
dm_preresume_fn preresume;
......@@ -188,8 +186,8 @@ struct dm_target {
sector_t begin;
sector_t len;
/* Always a power of 2 */
sector_t split_io;
/* If non-zero, maximum size of I/O submitted to a target. */
uint32_t max_io_len;
/*
* A number of zero-length barrier requests that will be submitted
......@@ -213,16 +211,28 @@ struct dm_target {
/* Used to provide an error string from the ctr */
char *error;
/*
* Set if this target needs to receive flushes regardless of
* whether or not its underlying devices have support.
*/
bool flush_supported:1;
/*
* Set if this target needs to receive discards regardless of
* whether or not its underlying devices have support.
*/
unsigned discards_supported:1;
bool discards_supported:1;
/*
* Set if the target required discard request to be split
* on max_io_len boundary.
*/
bool split_discard_requests:1;
/*
* Set if this target does not return zeroes on discarded blocks.
*/
unsigned discard_zeroes_data_unsupported:1;
bool discard_zeroes_data_unsupported:1;
};
/* Each target can link one of these into the table */
......@@ -359,6 +369,11 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback
*/
int dm_table_complete(struct dm_table *t);
/*
* Target may require that it is never sent I/O larger than len.
*/
int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len);
/*
* Table reference counting.
*/
......
......@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 22
#define DM_VERSION_MINOR 23
#define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2011-10-19)"
#define DM_VERSION_EXTRA "-ioctl (2012-07-25)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
......@@ -307,6 +307,8 @@ enum {
/*
* Set this to suspend without flushing queued ios.
* Also disables flushing uncommitted changes in the thin target before
* generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
*/
#define DM_NOFLUSH_FLAG (1 << 11) /* In */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment