Commit d9b9be02 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (36 commits)
  dm: set queue ordered mode
  dm: move wait queue declaration
  dm: merge pushback and deferred bio lists
  dm: allow uninterruptible wait for pending io
  dm: merge __flush_deferred_io into caller
  dm: move bio_io_error into __split_and_process_bio
  dm: rename __split_bio
  dm: remove unnecessary struct dm_wq_req
  dm: remove unnecessary work queue context field
  dm: remove unnecessary work queue type field
  dm: bio list add bio_list_add_head
  dm snapshot: persistent fix dtr cleanup
  dm snapshot: move status to exception store
  dm snapshot: move ctr parsing to exception store
  dm snapshot: use DMEMIT macro for status
  dm snapshot: remove dm_snap header
  dm snapshot: remove dm_snap header use
  dm exception store: move cow pointer
  dm exception store: move chunk_fields
  dm exception store: move dm_target pointer
  ...
parents 9b59f031 99360b4c
...@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio) ...@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
bl->tail = bio; bl->tail = bio;
} }
static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = bl->head;
bl->head = bio;
if (!bl->tail)
bl->tail = bio;
}
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{ {
if (!bl2->head) if (!bl2->head)
......
...@@ -16,30 +16,56 @@ ...@@ -16,30 +16,56 @@
* functions in this file help the target record and restore the * functions in this file help the target record and restore the
* original bio state. * original bio state.
*/ */
struct dm_bio_vec_details {
#if PAGE_SIZE < 65536
__u16 bv_len;
__u16 bv_offset;
#else
unsigned bv_len;
unsigned bv_offset;
#endif
};
struct dm_bio_details { struct dm_bio_details {
sector_t bi_sector; sector_t bi_sector;
struct block_device *bi_bdev; struct block_device *bi_bdev;
unsigned int bi_size; unsigned int bi_size;
unsigned short bi_idx; unsigned short bi_idx;
unsigned long bi_flags; unsigned long bi_flags;
struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
}; };
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
{ {
unsigned i;
bd->bi_sector = bio->bi_sector; bd->bi_sector = bio->bi_sector;
bd->bi_bdev = bio->bi_bdev; bd->bi_bdev = bio->bi_bdev;
bd->bi_size = bio->bi_size; bd->bi_size = bio->bi_size;
bd->bi_idx = bio->bi_idx; bd->bi_idx = bio->bi_idx;
bd->bi_flags = bio->bi_flags; bd->bi_flags = bio->bi_flags;
for (i = 0; i < bio->bi_vcnt; i++) {
bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
}
} }
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
{ {
unsigned i;
bio->bi_sector = bd->bi_sector; bio->bi_sector = bd->bi_sector;
bio->bi_bdev = bd->bi_bdev; bio->bi_bdev = bd->bi_bdev;
bio->bi_size = bd->bi_size; bio->bi_size = bd->bi_size;
bio->bi_idx = bd->bi_idx; bio->bi_idx = bd->bi_idx;
bio->bi_flags = bd->bi_flags; bio->bi_flags = bd->bi_flags;
for (i = 0; i < bio->bi_vcnt; i++) {
bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
}
} }
#endif #endif
...@@ -1156,8 +1156,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1156,8 +1156,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
crypto_free_ablkcipher(tfm); crypto_free_ablkcipher(tfm);
bad_cipher: bad_cipher:
/* Must zero key material before freeing */ /* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); kzfree(cc);
kfree(cc);
return -EINVAL; return -EINVAL;
} }
...@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti) ...@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
dm_put_device(ti, cc->dev); dm_put_device(ti, cc->dev);
/* Must zero key material before freeing */ /* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); kzfree(cc);
kfree(cc);
} }
static int crypt_map(struct dm_target *ti, struct bio *bio, static int crypt_map(struct dm_target *ti, struct bio *bio,
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "dm-exception-store.h" #include "dm-exception-store.h"
#include <linux/ctype.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
...@@ -14,6 +15,257 @@ ...@@ -14,6 +15,257 @@
#define DM_MSG_PREFIX "snapshot exception stores" #define DM_MSG_PREFIX "snapshot exception stores"
static LIST_HEAD(_exception_store_types);
static DEFINE_SPINLOCK(_lock);
static struct dm_exception_store_type *__find_exception_store_type(const char *name)
{
struct dm_exception_store_type *type;
list_for_each_entry(type, &_exception_store_types, list)
if (!strcmp(name, type->name))
return type;
return NULL;
}
static struct dm_exception_store_type *_get_exception_store_type(const char *name)
{
struct dm_exception_store_type *type;
spin_lock(&_lock);
type = __find_exception_store_type(name);
if (type && !try_module_get(type->module))
type = NULL;
spin_unlock(&_lock);
return type;
}
/*
* get_type
* @type_name
*
* Attempt to retrieve the dm_exception_store_type by name. If not already
* available, attempt to load the appropriate module.
*
* Exstore modules are named "dm-exstore-" followed by the 'type_name'.
* Modules may contain multiple types.
* This function will first try the module "dm-exstore-<type_name>",
* then truncate 'type_name' on the last '-' and try again.
*
* For example, if type_name was "clustered-shared", it would search
* 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
*
* 'dm-exception-store-<type_name>' is too long of a name in my
* opinion, which is why I've chosen to have the files
* containing exception store implementations be 'dm-exstore-<type_name>'.
* If you want your module to be autoloaded, you will follow this
* naming convention.
*
* Returns: dm_exception_store_type* on success, NULL on failure
*/
static struct dm_exception_store_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
struct dm_exception_store_type *type;
type = _get_exception_store_type(type_name);
if (type)
return type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
DMERR("No memory left to attempt load for \"%s\"", type_name);
return NULL;
}
while (request_module("dm-exstore-%s", type_name_dup) ||
!(type = _get_exception_store_type(type_name))) {
p = strrchr(type_name_dup, '-');
if (!p)
break;
p[0] = '\0';
}
if (!type)
DMWARN("Module for exstore type \"%s\" not found.", type_name);
kfree(type_name_dup);
return type;
}
static void put_type(struct dm_exception_store_type *type)
{
spin_lock(&_lock);
module_put(type->module);
spin_unlock(&_lock);
}
int dm_exception_store_type_register(struct dm_exception_store_type *type)
{
int r = 0;
spin_lock(&_lock);
if (!__find_exception_store_type(type->name))
list_add(&type->list, &_exception_store_types);
else
r = -EEXIST;
spin_unlock(&_lock);
return r;
}
EXPORT_SYMBOL(dm_exception_store_type_register);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
{
spin_lock(&_lock);
if (!__find_exception_store_type(type->name)) {
spin_unlock(&_lock);
return -EINVAL;
}
list_del(&type->list);
spin_unlock(&_lock);
return 0;
}
EXPORT_SYMBOL(dm_exception_store_type_unregister);
/*
* Round a number up to the nearest 'size' boundary. size must
* be a power of 2.
*/
static ulong round_up(ulong n, ulong size)
{
size--;
return (n + size) & ~size;
}
static int set_chunk_size(struct dm_exception_store *store,
const char *chunk_size_arg, char **error)
{
unsigned long chunk_size_ulong;
char *value;
chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
if (*chunk_size_arg == '\0' || *value != '\0') {
*error = "Invalid chunk size";
return -EINVAL;
}
if (!chunk_size_ulong) {
store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
return 0;
}
/*
* Chunk size must be multiple of page size. Silently
* round up if it's not.
*/
chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
/* Check chunk_size is a power of 2 */
if (!is_power_of_2(chunk_size_ulong)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
/* Validate the chunk size against the device block size */
if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
store->chunk_size = chunk_size_ulong;
store->chunk_mask = chunk_size_ulong - 1;
store->chunk_shift = ffs(chunk_size_ulong) - 1;
return 0;
}
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store)
{
int r = 0;
struct dm_exception_store_type *type;
struct dm_exception_store *tmp_store;
char persistent;
if (argc < 3) {
ti->error = "Insufficient exception store arguments";
return -EINVAL;
}
tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
if (!tmp_store) {
ti->error = "Exception store allocation failed";
return -ENOMEM;
}
persistent = toupper(*argv[1]);
if (persistent != 'P' && persistent != 'N') {
ti->error = "Persistent flag is not P or N";
return -EINVAL;
}
type = get_type(argv[1]);
if (!type) {
ti->error = "Exception store type not recognised";
r = -EINVAL;
goto bad_type;
}
tmp_store->type = type;
tmp_store->ti = ti;
r = dm_get_device(ti, argv[0], 0, 0,
FMODE_READ | FMODE_WRITE, &tmp_store->cow);
if (r) {
ti->error = "Cannot get COW device";
goto bad_cow;
}
r = set_chunk_size(tmp_store, argv[2], &ti->error);
if (r)
goto bad_cow;
r = type->ctr(tmp_store, 0, NULL);
if (r) {
ti->error = "Exception store type constructor failed";
goto bad_ctr;
}
*args_used = 3;
*store = tmp_store;
return 0;
bad_ctr:
dm_put_device(ti, tmp_store->cow);
bad_cow:
put_type(type);
bad_type:
kfree(tmp_store);
return r;
}
EXPORT_SYMBOL(dm_exception_store_create);
void dm_exception_store_destroy(struct dm_exception_store *store)
{
store->type->dtr(store);
dm_put_device(store->ti, store->cow);
put_type(store->type);
kfree(store);
}
EXPORT_SYMBOL(dm_exception_store_destroy);
int dm_exception_store_init(void) int dm_exception_store_init(void)
{ {
int r; int r;
......
...@@ -37,11 +37,18 @@ struct dm_snap_exception { ...@@ -37,11 +37,18 @@ struct dm_snap_exception {
* Abstraction to handle the meta/layout of exception stores (the * Abstraction to handle the meta/layout of exception stores (the
* COW device). * COW device).
*/ */
struct dm_exception_store { struct dm_exception_store;
struct dm_exception_store_type {
const char *name;
struct module *module;
int (*ctr) (struct dm_exception_store *store,
unsigned argc, char **argv);
/* /*
* Destroys this object when you've finished with it. * Destroys this object when you've finished with it.
*/ */
void (*destroy) (struct dm_exception_store *store); void (*dtr) (struct dm_exception_store *store);
/* /*
* The target shouldn't read the COW device until this is * The target shouldn't read the COW device until this is
...@@ -72,8 +79,9 @@ struct dm_exception_store { ...@@ -72,8 +79,9 @@ struct dm_exception_store {
*/ */
void (*drop_snapshot) (struct dm_exception_store *store); void (*drop_snapshot) (struct dm_exception_store *store);
int (*status) (struct dm_exception_store *store, status_type_t status, unsigned (*status) (struct dm_exception_store *store,
char *result, unsigned int maxlen); status_type_t status, char *result,
unsigned maxlen);
/* /*
* Return how full the snapshot is. * Return how full the snapshot is.
...@@ -82,7 +90,21 @@ struct dm_exception_store { ...@@ -82,7 +90,21 @@ struct dm_exception_store {
sector_t *numerator, sector_t *numerator,
sector_t *denominator); sector_t *denominator);
struct dm_snapshot *snap; /* For internal device-mapper use only. */
struct list_head list;
};
struct dm_exception_store {
struct dm_exception_store_type *type;
struct dm_target *ti;
struct dm_dev *cow;
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
chunk_t chunk_shift;
void *context; void *context;
}; };
...@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) ...@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
# endif # endif
/*
* Return the number of sectors in the device.
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
sector_t sector)
{
return (sector & ~store->chunk_mask) >> store->chunk_shift;
}
int dm_exception_store_type_register(struct dm_exception_store_type *type);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store);
void dm_exception_store_destroy(struct dm_exception_store *store);
int dm_exception_store_init(void); int dm_exception_store_init(void);
void dm_exception_store_exit(void); void dm_exception_store_exit(void);
...@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void); ...@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
int dm_transient_snapshot_init(void); int dm_transient_snapshot_init(void);
void dm_transient_snapshot_exit(void); void dm_transient_snapshot_exit(void);
int dm_create_persistent(struct dm_exception_store *store);
int dm_create_transient(struct dm_exception_store *store);
#endif /* _LINUX_DM_EXCEPTION_STORE */ #endif /* _LINUX_DM_EXCEPTION_STORE */
...@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, ...@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
while (1) { while (1) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
if (!atomic_read(&io.count) || signal_pending(current)) if (!atomic_read(&io.count))
break; break;
io_schedule(); io_schedule();
} }
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
if (atomic_read(&io.count))
return -EINTR;
if (error_bits) if (error_bits)
*error_bits = io.error_bits; *error_bits = io.error_bits;
......
...@@ -16,40 +16,29 @@ ...@@ -16,40 +16,29 @@
#define DM_MSG_PREFIX "dirty region log" #define DM_MSG_PREFIX "dirty region log"
struct dm_dirty_log_internal {
struct dm_dirty_log_type *type;
struct list_head list;
long use;
};
static LIST_HEAD(_log_types); static LIST_HEAD(_log_types);
static DEFINE_SPINLOCK(_lock); static DEFINE_SPINLOCK(_lock);
static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name) static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
{ {
struct dm_dirty_log_internal *log_type; struct dm_dirty_log_type *log_type;
list_for_each_entry(log_type, &_log_types, list) list_for_each_entry(log_type, &_log_types, list)
if (!strcmp(name, log_type->type->name)) if (!strcmp(name, log_type->name))
return log_type; return log_type;
return NULL; return NULL;
} }
static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
{ {
struct dm_dirty_log_internal *log_type; struct dm_dirty_log_type *log_type;
spin_lock(&_lock); spin_lock(&_lock);
log_type = __find_dirty_log_type(name); log_type = __find_dirty_log_type(name);
if (log_type) { if (log_type && !try_module_get(log_type->module))
if (!log_type->use && !try_module_get(log_type->type->module)) log_type = NULL;
log_type = NULL;
else
log_type->use++;
}
spin_unlock(&_lock); spin_unlock(&_lock);
...@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) ...@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
static struct dm_dirty_log_type *get_type(const char *type_name) static struct dm_dirty_log_type *get_type(const char *type_name)
{ {
char *p, *type_name_dup; char *p, *type_name_dup;
struct dm_dirty_log_internal *log_type; struct dm_dirty_log_type *log_type;
if (!type_name) if (!type_name)
return NULL; return NULL;
log_type = _get_dirty_log_type(type_name); log_type = _get_dirty_log_type(type_name);
if (log_type) if (log_type)
return log_type->type; return log_type;
type_name_dup = kstrdup(type_name, GFP_KERNEL); type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) { if (!type_name_dup) {
...@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name) ...@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
kfree(type_name_dup); kfree(type_name_dup);
return log_type ? log_type->type : NULL; return log_type;
} }
static void put_type(struct dm_dirty_log_type *type) static void put_type(struct dm_dirty_log_type *type)
{ {
struct dm_dirty_log_internal *log_type;
if (!type) if (!type)
return; return;
spin_lock(&_lock); spin_lock(&_lock);
log_type = __find_dirty_log_type(type->name); if (!__find_dirty_log_type(type->name))
if (!log_type)
goto out; goto out;
if (!--log_type->use) module_put(type->module);
module_put(type->module);
BUG_ON(log_type->use < 0);
out: out:
spin_unlock(&_lock); spin_unlock(&_lock);
} }
static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
GFP_KERNEL);
if (log_type)
log_type->type = type;
return log_type;
}
int dm_dirty_log_type_register(struct dm_dirty_log_type *type) int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
{ {
struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
int r = 0; int r = 0;
if (!log_type)
return -ENOMEM;
spin_lock(&_lock); spin_lock(&_lock);
if (!__find_dirty_log_type(type->name)) if (!__find_dirty_log_type(type->name))
list_add(&log_type->list, &_log_types); list_add(&type->list, &_log_types);
else { else
kfree(log_type);
r = -EEXIST; r = -EEXIST;
}
spin_unlock(&_lock); spin_unlock(&_lock);
return r; return r;
...@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register); ...@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
{ {
struct dm_dirty_log_internal *log_type;
spin_lock(&_lock); spin_lock(&_lock);
log_type = __find_dirty_log_type(type->name); if (!__find_dirty_log_type(type->name)) {
if (!log_type) {
spin_unlock(&_lock); spin_unlock(&_lock);
return -EINVAL; return -EINVAL;
} }
if (log_type->use) { list_del(&type->list);
spin_unlock(&_lock);
return -ETXTBSY;
}
list_del(&log_type->list);
spin_unlock(&_lock); spin_unlock(&_lock);
kfree(log_type);
return 0; return 0;
} }
......
...@@ -17,9 +17,7 @@ ...@@ -17,9 +17,7 @@
struct ps_internal { struct ps_internal {
struct path_selector_type pst; struct path_selector_type pst;
struct list_head list; struct list_head list;
long use;
}; };
#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst) #define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
...@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name) ...@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
down_read(&_ps_lock); down_read(&_ps_lock);
psi = __find_path_selector_type(name); psi = __find_path_selector_type(name);
if (psi) { if (psi && !try_module_get(psi->pst.module))
if ((psi->use == 0) && !try_module_get(psi->pst.module)) psi = NULL;
psi = NULL;
else
psi->use++;
}
up_read(&_ps_lock); up_read(&_ps_lock);
return psi; return psi;
...@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst) ...@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
if (!psi) if (!psi)
goto out; goto out;
if (--psi->use == 0) module_put(psi->pst.module);
module_put(psi->pst.module);
BUG_ON(psi->use < 0);
out: out:
up_read(&_ps_lock); up_read(&_ps_lock);
} }
...@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst) ...@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
return -EINVAL; return -EINVAL;
} }
if (psi->use) {
up_write(&_ps_lock);
return -ETXTBSY;
}
list_del(&psi->list); list_del(&psi->list);
up_write(&_ps_lock); up_write(&_ps_lock);
......
...@@ -145,6 +145,8 @@ struct dm_raid1_read_record { ...@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
struct dm_bio_details details; struct dm_bio_details details;
}; };
static struct kmem_cache *_dm_raid1_read_record_cache;
/* /*
* Every mirror should look like this one. * Every mirror should look like this one.
*/ */
...@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) ...@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
int state; int state;
struct bio *bio; struct bio *bio;
struct bio_list sync, nosync, recover, *this_list = NULL; struct bio_list sync, nosync, recover, *this_list = NULL;
struct bio_list requeue;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
region_t region;
if (!writes->head) if (!writes->head)
return; return;
...@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) ...@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_init(&sync); bio_list_init(&sync);
bio_list_init(&nosync); bio_list_init(&nosync);
bio_list_init(&recover); bio_list_init(&recover);
bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) { while ((bio = bio_list_pop(writes))) {
state = dm_rh_get_state(ms->rh, region = dm_rh_bio_to_region(ms->rh, bio);
dm_rh_bio_to_region(ms->rh, bio), 1);
if (log->type->is_remote_recovering &&
log->type->is_remote_recovering(log, region)) {
bio_list_add(&requeue, bio);
continue;
}
state = dm_rh_get_state(ms->rh, region, 1);
switch (state) { switch (state) {
case DM_RH_CLEAN: case DM_RH_CLEAN:
case DM_RH_DIRTY: case DM_RH_DIRTY:
...@@ -618,6 +631,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) ...@@ -618,6 +631,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_add(this_list, bio); bio_list_add(this_list, bio);
} }
/*
* Add bios that are delayed due to remote recovery
* back on to the write queue
*/
if (unlikely(requeue.head)) {
spin_lock_irq(&ms->lock);
bio_list_merge(&ms->writes, &requeue);
spin_unlock_irq(&ms->lock);
}
/* /*
* Increment the pending counts for any regions that will * Increment the pending counts for any regions that will
* be written to (writes to recover regions are going to * be written to (writes to recover regions are going to
...@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, ...@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
atomic_set(&ms->suspend, 0); atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR); atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
len = sizeof(struct dm_raid1_read_record); ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS, _dm_raid1_read_record_cache);
len);
if (!ms->read_record_pool) { if (!ms->read_record_pool) {
ti->error = "Error creating mirror read_record_pool"; ti->error = "Error creating mirror read_record_pool";
kfree(ms); kfree(ms);
...@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void) ...@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
{ {
int r; int r;
_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
if (!_dm_raid1_read_record_cache) {
DMERR("Can't allocate dm_raid1_read_record cache");
r = -ENOMEM;
goto bad_cache;
}
r = dm_register_target(&mirror_target); r = dm_register_target(&mirror_target);
if (r < 0) if (r < 0) {
DMERR("Failed to register mirror target"); DMERR("Failed to register mirror target");
goto bad_target;
}
return 0;
bad_target:
kmem_cache_destroy(_dm_raid1_read_record_cache);
bad_cache:
return r; return r;
} }
static void __exit dm_mirror_exit(void) static void __exit dm_mirror_exit(void)
{ {
dm_unregister_target(&mirror_target); dm_unregister_target(&mirror_target);
kmem_cache_destroy(_dm_raid1_read_record_cache);
} }
/* Module hooks */ /* Module hooks */
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
*/ */
#include "dm-exception-store.h" #include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
...@@ -89,7 +88,7 @@ struct commit_callback { ...@@ -89,7 +88,7 @@ struct commit_callback {
* The top level structure for a persistent exception store. * The top level structure for a persistent exception store.
*/ */
struct pstore { struct pstore {
struct dm_snapshot *snap; /* up pointer to my snapshot */ struct dm_exception_store *store;
int version; int version;
int valid; int valid;
uint32_t exceptions_per_area; uint32_t exceptions_per_area;
...@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps) ...@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
int r = -ENOMEM; int r = -ENOMEM;
size_t len; size_t len;
len = ps->snap->chunk_size << SECTOR_SHIFT; len = ps->store->chunk_size << SECTOR_SHIFT;
/* /*
* Allocate the chunk_size block of memory that will hold * Allocate the chunk_size block of memory that will hold
...@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps) ...@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
static void free_area(struct pstore *ps) static void free_area(struct pstore *ps)
{ {
vfree(ps->area); if (ps->area)
vfree(ps->area);
ps->area = NULL; ps->area = NULL;
vfree(ps->zero_area);
if (ps->zero_area)
vfree(ps->zero_area);
ps->zero_area = NULL; ps->zero_area = NULL;
} }
...@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work) ...@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{ {
struct dm_io_region where = { struct dm_io_region where = {
.bdev = ps->snap->cow->bdev, .bdev = ps->store->cow->bdev,
.sector = ps->snap->chunk_size * chunk, .sector = ps->store->chunk_size * chunk,
.count = ps->snap->chunk_size, .count = ps->store->chunk_size,
}; };
struct dm_io_request io_req = { struct dm_io_request io_req = {
.bi_rw = rw, .bi_rw = rw,
...@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw) ...@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
static void zero_memory_area(struct pstore *ps) static void zero_memory_area(struct pstore *ps)
{ {
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
} }
static int zero_disk_area(struct pstore *ps, chunk_t area) static int zero_disk_area(struct pstore *ps, chunk_t area)
{ {
struct dm_io_region where = { struct dm_io_region where = {
.bdev = ps->snap->cow->bdev, .bdev = ps->store->cow->bdev,
.sector = ps->snap->chunk_size * area_location(ps, area), .sector = ps->store->chunk_size * area_location(ps, area),
.count = ps->snap->chunk_size, .count = ps->store->chunk_size,
}; };
struct dm_io_request io_req = { struct dm_io_request io_req = {
.bi_rw = WRITE, .bi_rw = WRITE,
...@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot) ...@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
/* /*
* Use default chunk size (or hardsect_size, if larger) if none supplied * Use default chunk size (or hardsect_size, if larger) if none supplied
*/ */
if (!ps->snap->chunk_size) { if (!ps->store->chunk_size) {
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->snap->cow->bdev) >> 9); bdev_hardsect_size(ps->store->cow->bdev) >> 9);
ps->snap->chunk_mask = ps->snap->chunk_size - 1; ps->store->chunk_mask = ps->store->chunk_size - 1;
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
chunk_size_supplied = 0; chunk_size_supplied = 0;
} }
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
chunk_size)); chunk_size));
if (IS_ERR(ps->io_client)) if (IS_ERR(ps->io_client))
return PTR_ERR(ps->io_client); return PTR_ERR(ps->io_client);
...@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot) ...@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
ps->version = le32_to_cpu(dh->version); ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size); chunk_size = le32_to_cpu(dh->chunk_size);
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
return 0; return 0;
DMWARN("chunk size %llu in device metadata overrides " DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.", "table chunk size of %llu.",
(unsigned long long)chunk_size, (unsigned long long)chunk_size,
(unsigned long long)ps->snap->chunk_size); (unsigned long long)ps->store->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */ /* We had a bogus chunk_size. Fix stuff up. */
free_area(ps); free_area(ps);
ps->snap->chunk_size = chunk_size; ps->store->chunk_size = chunk_size;
ps->snap->chunk_mask = chunk_size - 1; ps->store->chunk_mask = chunk_size - 1;
ps->snap->chunk_shift = ffs(chunk_size) - 1; ps->store->chunk_shift = ffs(chunk_size) - 1;
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
ps->io_client); ps->io_client);
if (r) if (r)
return r; return r;
...@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps) ...@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
{ {
struct disk_header *dh; struct disk_header *dh;
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area; dh = (struct disk_header *) ps->area;
dh->magic = cpu_to_le32(SNAP_MAGIC); dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid); dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version); dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
return chunk_io(ps, 0, WRITE, 1); return chunk_io(ps, 0, WRITE, 1);
} }
...@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store) ...@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
static void persistent_fraction_full(struct dm_exception_store *store, static void persistent_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator) sector_t *numerator, sector_t *denominator)
{ {
*numerator = get_info(store)->next_free * store->snap->chunk_size; *numerator = get_info(store)->next_free * store->chunk_size;
*denominator = get_dev_size(store->snap->cow->bdev); *denominator = get_dev_size(store->cow->bdev);
} }
static void persistent_destroy(struct dm_exception_store *store) static void persistent_dtr(struct dm_exception_store *store)
{ {
struct pstore *ps = get_info(store); struct pstore *ps = get_info(store);
destroy_workqueue(ps->metadata_wq); destroy_workqueue(ps->metadata_wq);
dm_io_client_destroy(ps->io_client);
vfree(ps->callbacks); /* Created in read_header */
if (ps->io_client)
dm_io_client_destroy(ps->io_client);
free_area(ps); free_area(ps);
/* Allocated in persistent_read_metadata */
if (ps->callbacks)
vfree(ps->callbacks);
kfree(ps); kfree(ps);
} }
...@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, ...@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
/* /*
* Now we know correct chunk_size, complete the initialisation. * Now we know correct chunk_size, complete the initialisation.
*/ */
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception); sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area, ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
sizeof(*ps->callbacks)); sizeof(*ps->callbacks));
...@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store, ...@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
struct pstore *ps = get_info(store); struct pstore *ps = get_info(store);
uint32_t stride; uint32_t stride;
chunk_t next_free; chunk_t next_free;
sector_t size = get_dev_size(store->snap->cow->bdev); sector_t size = get_dev_size(store->cow->bdev);
/* Is there enough room ? */ /* Is there enough room ? */
if (size < ((ps->next_free + 1) * store->snap->chunk_size)) if (size < ((ps->next_free + 1) * store->chunk_size))
return -ENOSPC; return -ENOSPC;
e->new_chunk = ps->next_free; e->new_chunk = ps->next_free;
...@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store) ...@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
DMWARN("write header failed"); DMWARN("write header failed");
} }
int dm_create_persistent(struct dm_exception_store *store) static int persistent_ctr(struct dm_exception_store *store,
unsigned argc, char **argv)
{ {
struct pstore *ps; struct pstore *ps;
/* allocate the pstore */ /* allocate the pstore */
ps = kmalloc(sizeof(*ps), GFP_KERNEL); ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps) if (!ps)
return -ENOMEM; return -ENOMEM;
ps->snap = store->snap; ps->store = store;
ps->valid = 1; ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION; ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL; ps->area = NULL;
...@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store) ...@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
return -ENOMEM; return -ENOMEM;
} }
store->destroy = persistent_destroy;
store->read_metadata = persistent_read_metadata;
store->prepare_exception = persistent_prepare_exception;
store->commit_exception = persistent_commit_exception;
store->drop_snapshot = persistent_drop_snapshot;
store->fraction_full = persistent_fraction_full;
store->context = ps; store->context = ps;
return 0; return 0;
} }
static unsigned persistent_status(struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen)
{
unsigned sz = 0;
switch (status) {
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
DMEMIT(" %s P %llu", store->cow->name,
(unsigned long long)store->chunk_size);
}
return sz;
}
static struct dm_exception_store_type _persistent_type = {
.name = "persistent",
.module = THIS_MODULE,
.ctr = persistent_ctr,
.dtr = persistent_dtr,
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
.drop_snapshot = persistent_drop_snapshot,
.fraction_full = persistent_fraction_full,
.status = persistent_status,
};
static struct dm_exception_store_type _persistent_compat_type = {
.name = "P",
.module = THIS_MODULE,
.ctr = persistent_ctr,
.dtr = persistent_dtr,
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
.drop_snapshot = persistent_drop_snapshot,
.fraction_full = persistent_fraction_full,
.status = persistent_status,
};
int dm_persistent_snapshot_init(void) int dm_persistent_snapshot_init(void)
{ {
return 0; int r;
r = dm_exception_store_type_register(&_persistent_type);
if (r) {
DMERR("Unable to register persistent exception store type");
return r;
}
r = dm_exception_store_type_register(&_persistent_compat_type);
if (r) {
DMERR("Unable to register old-style persistent exception "
"store type");
dm_exception_store_type_unregister(&_persistent_type);
return r;
}
return r;
} }
void dm_persistent_snapshot_exit(void) void dm_persistent_snapshot_exit(void)
{ {
dm_exception_store_type_unregister(&_persistent_type);
dm_exception_store_type_unregister(&_persistent_compat_type);
} }
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
*/ */
#include "dm-exception-store.h" #include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
...@@ -23,7 +22,7 @@ struct transient_c { ...@@ -23,7 +22,7 @@ struct transient_c {
sector_t next_free; sector_t next_free;
}; };
static void transient_destroy(struct dm_exception_store *store) static void transient_dtr(struct dm_exception_store *store)
{ {
kfree(store->context); kfree(store->context);
} }
...@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store, ...@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
static int transient_prepare_exception(struct dm_exception_store *store, static int transient_prepare_exception(struct dm_exception_store *store,
struct dm_snap_exception *e) struct dm_snap_exception *e)
{ {
struct transient_c *tc = (struct transient_c *) store->context; struct transient_c *tc = store->context;
sector_t size = get_dev_size(store->snap->cow->bdev); sector_t size = get_dev_size(store->cow->bdev);
if (size < (tc->next_free + store->snap->chunk_size)) if (size < (tc->next_free + store->chunk_size))
return -1; return -1;
e->new_chunk = sector_to_chunk(store->snap, tc->next_free); e->new_chunk = sector_to_chunk(store, tc->next_free);
tc->next_free += store->snap->chunk_size; tc->next_free += store->chunk_size;
return 0; return 0;
} }
...@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store, ...@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator) sector_t *numerator, sector_t *denominator)
{ {
*numerator = ((struct transient_c *) store->context)->next_free; *numerator = ((struct transient_c *) store->context)->next_free;
*denominator = get_dev_size(store->snap->cow->bdev); *denominator = get_dev_size(store->cow->bdev);
} }
int dm_create_transient(struct dm_exception_store *store) static int transient_ctr(struct dm_exception_store *store,
unsigned argc, char **argv)
{ {
struct transient_c *tc; struct transient_c *tc;
store->destroy = transient_destroy;
store->read_metadata = transient_read_metadata;
store->prepare_exception = transient_prepare_exception;
store->commit_exception = transient_commit_exception;
store->drop_snapshot = NULL;
store->fraction_full = transient_fraction_full;
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
if (!tc) if (!tc)
return -ENOMEM; return -ENOMEM;
...@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store) ...@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
return 0; return 0;
} }
static unsigned transient_status(struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen)
{
unsigned sz = 0;
switch (status) {
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
DMEMIT(" %s N %llu", store->cow->name,
(unsigned long long)store->chunk_size);
}
return sz;
}
static struct dm_exception_store_type _transient_type = {
.name = "transient",
.module = THIS_MODULE,
.ctr = transient_ctr,
.dtr = transient_dtr,
.read_metadata = transient_read_metadata,
.prepare_exception = transient_prepare_exception,
.commit_exception = transient_commit_exception,
.fraction_full = transient_fraction_full,
.status = transient_status,
};
static struct dm_exception_store_type _transient_compat_type = {
.name = "N",
.module = THIS_MODULE,
.ctr = transient_ctr,
.dtr = transient_dtr,
.read_metadata = transient_read_metadata,
.prepare_exception = transient_prepare_exception,
.commit_exception = transient_commit_exception,
.fraction_full = transient_fraction_full,
.status = transient_status,
};
int dm_transient_snapshot_init(void) int dm_transient_snapshot_init(void)
{ {
return 0; int r;
r = dm_exception_store_type_register(&_transient_type);
if (r) {
DMWARN("Unable to register transient exception store type");
return r;
}
r = dm_exception_store_type_register(&_transient_compat_type);
if (r) {
DMWARN("Unable to register old-style transient "
"exception store type");
dm_exception_store_type_unregister(&_transient_type);
return r;
}
return r;
} }
void dm_transient_snapshot_exit(void) void dm_transient_snapshot_exit(void)
{ {
dm_exception_store_type_unregister(&_transient_type);
dm_exception_store_type_unregister(&_transient_compat_type);
} }
This diff is collapsed.
/*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
*
* This file is released under the GPL.
*/
#ifndef DM_SNAPSHOT_H
#define DM_SNAPSHOT_H
#include <linux/device-mapper.h>
#include "dm-exception-store.h"
#include "dm-bio-list.h"
#include <linux/blkdev.h>
#include <linux/workqueue.h>
struct exception_table {
uint32_t hash_mask;
unsigned hash_shift;
struct list_head *table;
};
#define DM_TRACKED_CHUNK_HASH_SIZE 16
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
struct dm_snapshot {
struct rw_semaphore lock;
struct dm_target *ti;
struct dm_dev *origin;
struct dm_dev *cow;
/* List of snapshots per Origin */
struct list_head list;
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
chunk_t chunk_shift;
/* You can't use a snapshot if this is 0 (e.g. if full) */
int valid;
/* Origin writes don't trigger exceptions until this is set */
int active;
/* Used for display of table */
char type;
mempool_t *pending_pool;
atomic_t pending_exceptions_count;
struct exception_table pending;
struct exception_table complete;
/*
* pe_lock protects all pending_exception operations and access
* as well as the snapshot_bios list.
*/
spinlock_t pe_lock;
/* The on disk metadata handler */
struct dm_exception_store store;
struct dm_kcopyd_client *kcopyd_client;
/* Queue of snapshot writes for ksnapd to flush */
struct bio_list queued_bios;
struct work_struct queued_bios_work;
/* Chunks with outstanding reads */
mempool_t *tracked_chunk_pool;
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
};
/*
* Return the number of sectors in the device.
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
{
return (sector & ~s->chunk_mask) >> s->chunk_shift;
}
static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
{
return chunk << s->chunk_shift;
}
static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
{
/*
* There is only ever one instance of a particular block
* device so we can compare pointers safely.
*/
return lhs == rhs;
}
#endif
...@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start, ...@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
} }
/* /*
* This upgrades the mode on an already open dm_dev. Being * This upgrades the mode on an already open dm_dev, being
* careful to leave things as they were if we fail to reopen the * careful to leave things as they were if we fail to reopen the
* device. * device and not to touch the existing bdev field in case
* it is accessed concurrently inside dm_table_any_congested().
*/ */
static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
struct mapped_device *md) struct mapped_device *md)
{ {
int r; int r;
struct dm_dev_internal dd_copy; struct dm_dev_internal dd_new, dd_old;
dev_t dev = dd->dm_dev.bdev->bd_dev;
dd_copy = *dd; dd_new = dd_old = *dd;
dd_new.dm_dev.mode |= new_mode;
dd_new.dm_dev.bdev = NULL;
r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
if (r)
return r;
dd->dm_dev.mode |= new_mode; dd->dm_dev.mode |= new_mode;
dd->dm_dev.bdev = NULL; close_dev(&dd_old, md);
r = open_dev(dd, dev, md);
if (!r)
close_dev(&dd_copy, md);
else
*dd = dd_copy;
return r; return 0;
} }
/* /*
......
...@@ -14,45 +14,34 @@ ...@@ -14,45 +14,34 @@
#define DM_MSG_PREFIX "target" #define DM_MSG_PREFIX "target"
struct tt_internal {
struct target_type tt;
struct list_head list;
long use;
};
static LIST_HEAD(_targets); static LIST_HEAD(_targets);
static DECLARE_RWSEM(_lock); static DECLARE_RWSEM(_lock);
#define DM_MOD_NAME_SIZE 32 #define DM_MOD_NAME_SIZE 32
static inline struct tt_internal *__find_target_type(const char *name) static inline struct target_type *__find_target_type(const char *name)
{ {
struct tt_internal *ti; struct target_type *tt;
list_for_each_entry (ti, &_targets, list) list_for_each_entry(tt, &_targets, list)
if (!strcmp(name, ti->tt.name)) if (!strcmp(name, tt->name))
return ti; return tt;
return NULL; return NULL;
} }
static struct tt_internal *get_target_type(const char *name) static struct target_type *get_target_type(const char *name)
{ {
struct tt_internal *ti; struct target_type *tt;
down_read(&_lock); down_read(&_lock);
ti = __find_target_type(name); tt = __find_target_type(name);
if (ti) { if (tt && !try_module_get(tt->module))
if ((ti->use == 0) && !try_module_get(ti->tt.module)) tt = NULL;
ti = NULL;
else
ti->use++;
}
up_read(&_lock); up_read(&_lock);
return ti; return tt;
} }
static void load_module(const char *name) static void load_module(const char *name)
...@@ -62,92 +51,59 @@ static void load_module(const char *name) ...@@ -62,92 +51,59 @@ static void load_module(const char *name)
struct target_type *dm_get_target_type(const char *name) struct target_type *dm_get_target_type(const char *name)
{ {
struct tt_internal *ti = get_target_type(name); struct target_type *tt = get_target_type(name);
if (!ti) { if (!tt) {
load_module(name); load_module(name);
ti = get_target_type(name); tt = get_target_type(name);
} }
return ti ? &ti->tt : NULL; return tt;
} }
void dm_put_target_type(struct target_type *t) void dm_put_target_type(struct target_type *tt)
{ {
struct tt_internal *ti = (struct tt_internal *) t;
down_read(&_lock); down_read(&_lock);
if (--ti->use == 0) module_put(tt->module);
module_put(ti->tt.module);
BUG_ON(ti->use < 0);
up_read(&_lock); up_read(&_lock);
return;
}
static struct tt_internal *alloc_target(struct target_type *t)
{
struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
if (ti)
ti->tt = *t;
return ti;
} }
int dm_target_iterate(void (*iter_func)(struct target_type *tt, int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param) void *param), void *param)
{ {
struct tt_internal *ti; struct target_type *tt;
down_read(&_lock); down_read(&_lock);
list_for_each_entry (ti, &_targets, list) list_for_each_entry(tt, &_targets, list)
iter_func(&ti->tt, param); iter_func(tt, param);
up_read(&_lock); up_read(&_lock);
return 0; return 0;
} }
int dm_register_target(struct target_type *t) int dm_register_target(struct target_type *tt)
{ {
int rv = 0; int rv = 0;
struct tt_internal *ti = alloc_target(t);
if (!ti)
return -ENOMEM;
down_write(&_lock); down_write(&_lock);
if (__find_target_type(t->name)) if (__find_target_type(tt->name))
rv = -EEXIST; rv = -EEXIST;
else else
list_add(&ti->list, &_targets); list_add(&tt->list, &_targets);
up_write(&_lock); up_write(&_lock);
if (rv)
kfree(ti);
return rv; return rv;
} }
void dm_unregister_target(struct target_type *t) void dm_unregister_target(struct target_type *tt)
{ {
struct tt_internal *ti;
down_write(&_lock); down_write(&_lock);
if (!(ti = __find_target_type(t->name))) { if (!__find_target_type(tt->name)) {
DMCRIT("Unregistering unrecognised target: %s", t->name); DMCRIT("Unregistering unrecognised target: %s", tt->name);
BUG();
}
if (ti->use) {
DMCRIT("Attempt to unregister target still in use: %s",
t->name);
BUG(); BUG();
} }
list_del(&ti->list); list_del(&tt->list);
kfree(ti);
up_write(&_lock); up_write(&_lock);
} }
...@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t) ...@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
* io-err: always fails an io, useful for bringing * io-err: always fails an io, useful for bringing
* up LVs that have holes in them. * up LVs that have holes in them.
*/ */
static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args) static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{ {
return 0; return 0;
} }
static void io_err_dtr(struct dm_target *ti) static void io_err_dtr(struct dm_target *tt)
{ {
/* empty */ /* empty */
} }
static int io_err_map(struct dm_target *ti, struct bio *bio, static int io_err_map(struct dm_target *tt, struct bio *bio,
union map_info *map_context) union map_info *map_context)
{ {
return -EIO; return -EIO;
......
...@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio) ...@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
/* /*
* Work processed by per-device workqueue. * Work processed by per-device workqueue.
*/ */
struct dm_wq_req {
enum {
DM_WQ_FLUSH_DEFERRED,
} type;
struct work_struct work;
struct mapped_device *md;
void *context;
};
struct mapped_device { struct mapped_device {
struct rw_semaphore io_lock; struct rw_semaphore io_lock;
struct mutex suspend_lock; struct mutex suspend_lock;
spinlock_t pushback_lock;
rwlock_t map_lock; rwlock_t map_lock;
atomic_t holders; atomic_t holders;
atomic_t open_count; atomic_t open_count;
...@@ -129,8 +119,9 @@ struct mapped_device { ...@@ -129,8 +119,9 @@ struct mapped_device {
*/ */
atomic_t pending; atomic_t pending;
wait_queue_head_t wait; wait_queue_head_t wait;
struct work_struct work;
struct bio_list deferred; struct bio_list deferred;
struct bio_list pushback; spinlock_t deferred_lock;
/* /*
* Processing queue (flush/barriers) * Processing queue (flush/barriers)
...@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio) ...@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
return 1; return 1;
} }
spin_lock_irq(&md->deferred_lock);
bio_list_add(&md->deferred, bio); bio_list_add(&md->deferred, bio);
spin_unlock_irq(&md->deferred_lock);
up_write(&md->io_lock); up_write(&md->io_lock);
return 0; /* deferred successfully */ return 0; /* deferred successfully */
...@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error) ...@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
if (io->error == DM_ENDIO_REQUEUE) { if (io->error == DM_ENDIO_REQUEUE) {
/* /*
* Target requested pushing back the I/O. * Target requested pushing back the I/O.
* This must be handled before the sleeper on
* suspend queue merges the pushback list.
*/ */
spin_lock_irqsave(&md->pushback_lock, flags); spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md)) if (__noflush_suspending(md))
bio_list_add(&md->pushback, io->bio); bio_list_add(&md->deferred, io->bio);
else else
/* noflush suspend was interrupted. */ /* noflush suspend was interrupted. */
io->error = -EIO; io->error = -EIO;
spin_unlock_irqrestore(&md->pushback_lock, flags); spin_unlock_irqrestore(&md->deferred_lock, flags);
} }
end_io_acct(io); end_io_acct(io);
...@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci) ...@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
} }
/* /*
* Split the bio into several clones. * Split the bio into several clones and submit it to targets.
*/ */
static int __split_bio(struct mapped_device *md, struct bio *bio) static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{ {
struct clone_info ci; struct clone_info ci;
int error = 0; int error = 0;
ci.map = dm_get_table(md); ci.map = dm_get_table(md);
if (unlikely(!ci.map)) if (unlikely(!ci.map)) {
return -EIO; bio_io_error(bio);
return;
}
if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
dm_table_put(ci.map); dm_table_put(ci.map);
bio_endio(bio, -EOPNOTSUPP); bio_endio(bio, -EOPNOTSUPP);
return 0; return;
} }
ci.md = md; ci.md = md;
ci.bio = bio; ci.bio = bio;
...@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) ...@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */ /* drop the extra reference count */
dec_pending(ci.io, error); dec_pending(ci.io, error);
dm_table_put(ci.map); dm_table_put(ci.map);
return 0;
} }
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
* CRUD END * CRUD END
...@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio) ...@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
down_read(&md->io_lock); down_read(&md->io_lock);
} }
r = __split_bio(md, bio); __split_and_process_bio(md, bio);
up_read(&md->io_lock); up_read(&md->io_lock);
return 0;
out_req: out_req:
if (r < 0) if (r < 0)
...@@ -1074,6 +1066,8 @@ static int next_free_minor(int *minor) ...@@ -1074,6 +1066,8 @@ static int next_free_minor(int *minor)
static struct block_device_operations dm_blk_dops; static struct block_device_operations dm_blk_dops;
static void dm_wq_work(struct work_struct *work);
/* /*
* Allocate and initialise a blank device with a given minor. * Allocate and initialise a blank device with a given minor.
*/ */
...@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
init_rwsem(&md->io_lock); init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock); mutex_init(&md->suspend_lock);
spin_lock_init(&md->pushback_lock); spin_lock_init(&md->deferred_lock);
rwlock_init(&md->map_lock); rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1); atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0); atomic_set(&md->open_count, 0);
...@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md; md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request); blk_queue_make_request(md->queue, dm_request);
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all; md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec); blk_queue_merge_bvec(md->queue, dm_merge_bvec);
...@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->pending, 0); atomic_set(&md->pending, 0);
init_waitqueue_head(&md->wait); init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq); init_waitqueue_head(&md->eventq);
md->disk->major = _major; md->disk->major = _major;
...@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md) ...@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
} }
EXPORT_SYMBOL_GPL(dm_put); EXPORT_SYMBOL_GPL(dm_put);
static int dm_wait_for_completion(struct mapped_device *md) static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
{ {
int r = 0; int r = 0;
DECLARE_WAITQUEUE(wait, current);
dm_unplug_all(md->queue);
add_wait_queue(&md->wait, &wait);
while (1) { while (1) {
set_current_state(TASK_INTERRUPTIBLE); set_current_state(interruptible);
smp_mb(); smp_mb();
if (!atomic_read(&md->pending)) if (!atomic_read(&md->pending))
break; break;
if (signal_pending(current)) { if (interruptible == TASK_INTERRUPTIBLE &&
signal_pending(current)) {
r = -EINTR; r = -EINTR;
break; break;
} }
...@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md) ...@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
} }
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
remove_wait_queue(&md->wait, &wait);
return r; return r;
} }
/* /*
* Process the deferred bios * Process the deferred bios
*/ */
static void __flush_deferred_io(struct mapped_device *md) static void dm_wq_work(struct work_struct *work)
{ {
struct mapped_device *md = container_of(work, struct mapped_device,
work);
struct bio *c; struct bio *c;
while ((c = bio_list_pop(&md->deferred))) { down_write(&md->io_lock);
if (__split_bio(md, c))
bio_io_error(c);
}
clear_bit(DMF_BLOCK_IO, &md->flags);
}
static void __merge_pushback_list(struct mapped_device *md) next_bio:
{ spin_lock_irq(&md->deferred_lock);
unsigned long flags; c = bio_list_pop(&md->deferred);
spin_unlock_irq(&md->deferred_lock);
spin_lock_irqsave(&md->pushback_lock, flags); if (c) {
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); __split_and_process_bio(md, c);
bio_list_merge_head(&md->deferred, &md->pushback); goto next_bio;
bio_list_init(&md->pushback); }
spin_unlock_irqrestore(&md->pushback_lock, flags);
}
static void dm_wq_work(struct work_struct *work) clear_bit(DMF_BLOCK_IO, &md->flags);
{
struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
struct mapped_device *md = req->md;
down_write(&md->io_lock);
switch (req->type) {
case DM_WQ_FLUSH_DEFERRED:
__flush_deferred_io(md);
break;
default:
DMERR("dm_wq_work: unrecognised work type %d", req->type);
BUG();
}
up_write(&md->io_lock); up_write(&md->io_lock);
} }
static void dm_wq_queue(struct mapped_device *md, int type, void *context, static void dm_queue_flush(struct mapped_device *md)
struct dm_wq_req *req)
{
req->type = type;
req->md = md;
req->context = context;
INIT_WORK(&req->work, dm_wq_work);
queue_work(md->wq, &req->work);
}
static void dm_queue_flush(struct mapped_device *md, int type, void *context)
{ {
struct dm_wq_req req; queue_work(md->wq, &md->work);
dm_wq_queue(md, type, context, &req);
flush_workqueue(md->wq); flush_workqueue(md->wq);
} }
...@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md) ...@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
int dm_suspend(struct mapped_device *md, unsigned suspend_flags) int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{ {
struct dm_table *map = NULL; struct dm_table *map = NULL;
DECLARE_WAITQUEUE(wait, current);
int r = 0; int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
...@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) ...@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
down_write(&md->io_lock); down_write(&md->io_lock);
set_bit(DMF_BLOCK_IO, &md->flags); set_bit(DMF_BLOCK_IO, &md->flags);
add_wait_queue(&md->wait, &wait);
up_write(&md->io_lock); up_write(&md->io_lock);
/* unplug */
if (map)
dm_table_unplug_all(map);
/* /*
* Wait for the already-mapped ios to complete. * Wait for the already-mapped ios to complete.
*/ */
r = dm_wait_for_completion(md); r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
down_write(&md->io_lock); down_write(&md->io_lock);
remove_wait_queue(&md->wait, &wait);
if (noflush) if (noflush)
__merge_pushback_list(md); clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
up_write(&md->io_lock); up_write(&md->io_lock);
/* were we interrupted ? */ /* were we interrupted ? */
if (r < 0) { if (r < 0) {
dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); dm_queue_flush(md);
unlock_fs(md); unlock_fs(md);
goto out; /* pushback list is already flushed, so skip flush */ goto out; /* pushback list is already flushed, so skip flush */
...@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md) ...@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
if (r) if (r)
goto out; goto out;
dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); dm_queue_flush(md);
unlock_fs(md); unlock_fs(md);
......
...@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t); ...@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
int dm_target_init(void); int dm_target_init(void);
void dm_target_exit(void); void dm_target_exit(void);
struct target_type *dm_get_target_type(const char *name); struct target_type *dm_get_target_type(const char *name);
void dm_put_target_type(struct target_type *t); void dm_put_target_type(struct target_type *tt);
int dm_target_iterate(void (*iter_func)(struct target_type *tt, int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param); void *param), void *param);
......
...@@ -139,6 +139,9 @@ struct target_type { ...@@ -139,6 +139,9 @@ struct target_type {
dm_ioctl_fn ioctl; dm_ioctl_fn ioctl;
dm_merge_fn merge; dm_merge_fn merge;
dm_busy_fn busy; dm_busy_fn busy;
/* For internal device-mapper use. */
struct list_head list;
}; };
struct io_restrictions { struct io_restrictions {
......
...@@ -28,6 +28,9 @@ struct dm_dirty_log_type { ...@@ -28,6 +28,9 @@ struct dm_dirty_log_type {
const char *name; const char *name;
struct module *module; struct module *module;
/* For internal device-mapper use */
struct list_head list;
int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
unsigned argc, char **argv); unsigned argc, char **argv);
void (*dtr)(struct dm_dirty_log *log); void (*dtr)(struct dm_dirty_log *log);
...@@ -113,6 +116,16 @@ struct dm_dirty_log_type { ...@@ -113,6 +116,16 @@ struct dm_dirty_log_type {
*/ */
int (*status)(struct dm_dirty_log *log, status_type_t status_type, int (*status)(struct dm_dirty_log *log, status_type_t status_type,
char *result, unsigned maxlen); char *result, unsigned maxlen);
/*
* is_remote_recovering is necessary for cluster mirroring. It provides
* a way to detect recovery on another node, so we aren't writing
* concurrently. This function is likely to block (when a cluster log
* is used).
*
* Returns: 0, 1
*/
int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
}; };
int dm_dirty_log_type_register(struct dm_dirty_log_type *type); int dm_dirty_log_type_register(struct dm_dirty_log_type *type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment