Commit 9c8794cd authored by Stefan Bader's avatar Stefan Bader Committed by Tim Gardner

UBUNTU: SAUCE: (no-up) ubuntu: dm-raid45

ExternalDriver: dm-raid45
Description: This software extends device-mapper by RAID4 and RAID5 mappings.
Url: http://people.redhat.com/~heinzm/sw/dm/dm-raid45/
Version: 2009.04.24 (2.6.30-rc3)
Signed-off-by: default avatarManoj Iyer <manoj.iyer@canonical.com>
Signed-off-by: default avatarTim Gardner <tim.gardner@canonical.com>
parent 755486f6
......@@ -10,6 +10,7 @@ menu "Ubuntu Supplied Third-Party Device Drivers"
##
##
##
source "ubuntu/dm-raid4-5/Kconfig"
##
##
##
......
......@@ -12,6 +12,7 @@
##
##
##
obj-$(CONFIG_DM_RAID45) += dm-raid4-5/
##
##
##
......
Downloaded from: http://people.redhat.com/~heinzm/sw/dm/dm-raid45/
Current Version: 2009.04.24 (2.6.30-rc3)
Comments: All of the patches to dmraid1/dm-log, etc are upstream.
config DM_RAID45
tristate "RAID 4/5 target (EXPERIMENTAL)"
depends on BLK_DEV_DM && XOR_BLOCKS && EXPERIMENTAL
default m
---help---
A target that supports RAID4 and RAID5 mappings.
EXTRA_CFLAGS += -I$(srctree)/drivers/md
obj-$(CONFIG_DM_RAID45) := dm-raid45.o
dm-raid45-objs := dm-raid4-5.o dm-memcache.o dm-region-hash.o dm-message.o
/*
* Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* Module Author: Heinz Mauelshagen <heinzm@redhat.com>
*
* Device-mapper memory object handling:
*
* o allocate/free total_pages in a per client page pool.
*
* o allocate/free memory objects with chunks (1..n) of
* pages_per_chunk pages hanging off.
*
* This file is released under the GPL.
*/
#define DM_MEM_CACHE_VERSION "0.2"
#include "dm.h"
#include "dm-memcache.h"
#include <linux/dm-io.h>
#include <linux/slab.h>
#include <linux/module.h>
struct dm_mem_cache_client {
spinlock_t lock;
mempool_t *objs_pool;
struct page_list *free_list;
unsigned objects;
unsigned chunks;
unsigned pages_per_chunk;
unsigned free_pages;
unsigned total_pages;
};
/*
* Free pages and page_list elements of client.
*/
static void free_cache_pages(struct page_list *list)
{
while (list) {
struct page_list *pl = list;
list = pl->next;
BUG_ON(!pl->page);
__free_page(pl->page);
kfree(pl);
}
}
/*
* Alloc number of pages and page_list elements as required by client.
*/
static struct page_list *alloc_cache_pages(unsigned pages)
{
struct page_list *pl, *ret = NULL;
struct page *page;
while (pages--) {
page = alloc_page(GFP_NOIO);
if (!page)
goto err;
pl = kmalloc(sizeof(*pl), GFP_NOIO);
if (!pl) {
__free_page(page);
goto err;
}
pl->page = page;
pl->next = ret;
ret = pl;
}
return ret;
err:
free_cache_pages(ret);
return NULL;
}
/*
* Allocate page_list elements from the pool to chunks of the memory object.
*/
static void alloc_chunks(struct dm_mem_cache_client *cl,
struct dm_mem_cache_object *obj)
{
unsigned chunks = cl->chunks;
unsigned long flags;
local_irq_save(flags);
local_irq_disable();
while (chunks--) {
unsigned p = cl->pages_per_chunk;
obj[chunks].pl = NULL;
while (p--) {
struct page_list *pl;
/* Take next element from free list */
spin_lock(&cl->lock);
pl = cl->free_list;
BUG_ON(!pl);
cl->free_list = pl->next;
spin_unlock(&cl->lock);
pl->next = obj[chunks].pl;
obj[chunks].pl = pl;
}
}
local_irq_restore(flags);
}
/*
* Free page_list elements putting them back onto free list
*/
static void free_chunks(struct dm_mem_cache_client *cl,
struct dm_mem_cache_object *obj)
{
unsigned chunks = cl->chunks;
unsigned long flags;
struct page_list *next, *pl;
local_irq_save(flags);
local_irq_disable();
while (chunks--) {
for (pl = obj[chunks].pl; pl; pl = next) {
next = pl->next;
spin_lock(&cl->lock);
pl->next = cl->free_list;
cl->free_list = pl;
cl->free_pages++;
spin_unlock(&cl->lock);
}
}
local_irq_restore(flags);
}
/*
* Create/destroy dm memory cache client resources.
*/
struct dm_mem_cache_client *
dm_mem_cache_client_create(unsigned objects, unsigned chunks,
unsigned pages_per_chunk)
{
unsigned total_pages = objects * chunks * pages_per_chunk;
struct dm_mem_cache_client *client;
BUG_ON(!total_pages);
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client)
return ERR_PTR(-ENOMEM);
client->objs_pool = mempool_create_kmalloc_pool(objects,
chunks * sizeof(struct dm_mem_cache_object));
if (!client->objs_pool)
goto err;
client->free_list = alloc_cache_pages(total_pages);
if (!client->free_list)
goto err1;
spin_lock_init(&client->lock);
client->objects = objects;
client->chunks = chunks;
client->pages_per_chunk = pages_per_chunk;
client->free_pages = client->total_pages = total_pages;
return client;
err1:
mempool_destroy(client->objs_pool);
err:
kfree(client);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(dm_mem_cache_client_create);
void dm_mem_cache_client_destroy(struct dm_mem_cache_client *cl)
{
BUG_ON(cl->free_pages != cl->total_pages);
free_cache_pages(cl->free_list);
mempool_destroy(cl->objs_pool);
kfree(cl);
}
EXPORT_SYMBOL(dm_mem_cache_client_destroy);
/*
* Grow a clients cache by an amount of pages.
*
* Don't call from interrupt context!
*/
int dm_mem_cache_grow(struct dm_mem_cache_client *cl, unsigned objects)
{
unsigned pages = objects * cl->chunks * cl->pages_per_chunk;
struct page_list *pl, *last;
BUG_ON(!pages);
pl = alloc_cache_pages(pages);
if (!pl)
return -ENOMEM;
last = pl;
while (last->next)
last = last->next;
spin_lock_irq(&cl->lock);
last->next = cl->free_list;
cl->free_list = pl;
cl->free_pages += pages;
cl->total_pages += pages;
cl->objects++;
spin_unlock_irq(&cl->lock);
mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
return 0;
}
EXPORT_SYMBOL(dm_mem_cache_grow);
/* Shrink a clients cache by an amount of pages */
int dm_mem_cache_shrink(struct dm_mem_cache_client *cl, unsigned objects)
{
int r;
unsigned pages = objects * cl->chunks * cl->pages_per_chunk, p = pages;
unsigned long flags;
struct page_list *last = NULL, *pl, *pos;
BUG_ON(!pages);
spin_lock_irqsave(&cl->lock, flags);
pl = pos = cl->free_list;
while (p-- && pos->next) {
last = pos;
pos = pos->next;
}
if (++p)
r = -ENOMEM;
else {
r = 0;
cl->free_list = pos;
cl->free_pages -= pages;
cl->total_pages -= pages;
cl->objects--;
last->next = NULL;
}
spin_unlock_irqrestore(&cl->lock, flags);
if (!r) {
free_cache_pages(pl);
mempool_resize(cl->objs_pool, cl->objects, GFP_NOIO);
}
return r;
}
EXPORT_SYMBOL(dm_mem_cache_shrink);
/*
* Allocate/free a memory object
*
* Can be called from interrupt context
*/
struct dm_mem_cache_object *dm_mem_cache_alloc(struct dm_mem_cache_client *cl)
{
int r = 0;
unsigned pages = cl->chunks * cl->pages_per_chunk;
unsigned long flags;
struct dm_mem_cache_object *obj;
obj = mempool_alloc(cl->objs_pool, GFP_NOIO);
if (!obj)
return ERR_PTR(-ENOMEM);
spin_lock_irqsave(&cl->lock, flags);
if (pages > cl->free_pages)
r = -ENOMEM;
else
cl->free_pages -= pages;
spin_unlock_irqrestore(&cl->lock, flags);
if (r) {
mempool_free(obj, cl->objs_pool);
return ERR_PTR(r);
}
alloc_chunks(cl, obj);
return obj;
}
EXPORT_SYMBOL(dm_mem_cache_alloc);
void dm_mem_cache_free(struct dm_mem_cache_client *cl,
struct dm_mem_cache_object *obj)
{
free_chunks(cl, obj);
mempool_free(obj, cl->objs_pool);
}
EXPORT_SYMBOL(dm_mem_cache_free);
MODULE_DESCRIPTION(DM_NAME " dm memory cache");
MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
MODULE_LICENSE("GPL");
/*
* Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* Module Author: Heinz Mauelshagen <Mauelshagen@RedHat.com>
*
* Device-mapper memory object handling:
*
* o allocate/free total_pages in a per client page pool.
*
* o allocate/free memory objects with chunks (1..n) of
* pages_per_chunk pages hanging off.
*
* This file is released under the GPL.
*/
#ifndef _DM_MEM_CACHE_H
#define _DM_MEM_CACHE_H
#define DM_MEM_CACHE_H_VERSION "0.1"
#include "dm.h"
#include <linux/dm-io.h>
static inline struct page_list *pl_elem(struct page_list *pl, unsigned p)
{
while (pl && p--)
pl = pl->next;
return pl;
}
struct dm_mem_cache_object {
struct page_list *pl; /* Dynamically allocated array */
void *private; /* Caller context reference */
};
struct dm_mem_cache_client;
/*
* Create/destroy dm memory cache client resources.
*
* On creation, a number of @objects with @chunks of
* @pages_per_chunk pages will be allocated.
*/
struct dm_mem_cache_client *
dm_mem_cache_client_create(unsigned objects, unsigned chunks,
unsigned pages_per_chunk);
void dm_mem_cache_client_destroy(struct dm_mem_cache_client *client);
/*
* Grow/shrink a dm memory cache client resources
* by @objetcs amount of objects.
*/
int dm_mem_cache_grow(struct dm_mem_cache_client *client, unsigned objects);
int dm_mem_cache_shrink(struct dm_mem_cache_client *client, unsigned objects);
/*
* Allocate/free a memory object
*
* On allocation one object with an amount of chunks and
* an amount of pages per chunk will be returned on success.
*/
struct dm_mem_cache_object *
dm_mem_cache_alloc(struct dm_mem_cache_client *client);
void dm_mem_cache_free(struct dm_mem_cache_client *client,
struct dm_mem_cache_object *object);
#endif
/*
* Copyright (C) 2007,2008 Red Hat Inc. All rights reserved.
*
* Module Author: Heinz Mauelshagen <heinzm@redhat.com>
*
* General device-mapper message interface argument parser.
*
* This file is released under the GPL.
*
* device-mapper message parser.
*
*/
#include "dm.h"
#include "dm-message.h"
#include <linux/kernel.h>
#include <linux/module.h>
#define DM_MSG_PREFIX "dm_message"
/* Basename of a path. */
static inline char *
basename(char *s)
{
char *p = strrchr(s, '/');
return p ? p + 1 : s;
}
/* Get an argument depending on type. */
static void
message_arguments(struct dm_msg *msg, int argc, char **argv)
{
if (argc) {
int i;
struct dm_message_argument *args = msg->spec->args;
for (i = 0; i < args->num_args; i++) {
int r;
unsigned long **ptr = args->ptr;
enum dm_message_argument_type type = args->types[i];
switch (type) {
case dm_msg_base_t:
((char **) ptr)[i] = basename(argv[i]);
break;
case dm_msg_str_t:
((char **) ptr)[i] = argv[i];
break;
case dm_msg_int_t:
r = sscanf(argv[i], "%d", ((int **) ptr)[i]);
goto check;
case dm_msg_uint_t:
r = sscanf(argv[i], "%u",
((unsigned **) ptr)[i]);
goto check;
case dm_msg_uint64_t:
r = sscanf(argv[i], "%llu",
((unsigned long long **) ptr)[i]);
check:
if (r != 1) {
set_bit(dm_msg_ret_undef, &msg->ret);
set_bit(dm_msg_ret_arg, &msg->ret);
}
}
}
}
}
/* Parse message options. */
static void
message_options_parse(struct dm_msg *msg, int argc, char **argv)
{
int hit = 0;
unsigned long *action;
size_t l1 = strlen(*argv), l_hit = 0;
struct dm_message_option *o = msg->spec->options;
char **option, **option_end = o->options + o->num_options;
for (option = o->options, action = o->actions;
option < option_end; option++, action++) {
size_t l2 = strlen(*option);
if (!strnicmp(*argv, *option, min(l1, l2))) {
hit++;
l_hit = l2;
set_bit(*action, &msg->action);
}
}
/* Assume error. */
msg->ret = 0;
set_bit(dm_msg_ret_option, &msg->ret);
if (!hit || l1 > l_hit)
set_bit(dm_msg_ret_undef, &msg->ret); /* Undefined option. */
else if (hit > 1)
set_bit(dm_msg_ret_ambiguous, &msg->ret); /* Ambiguous option.*/
else {
clear_bit(dm_msg_ret_option, &msg->ret); /* Option OK. */
message_arguments(msg, --argc, ++argv);
}
}
static inline void
print_ret(const char *caller, unsigned long ret)
{
struct {
unsigned long err;
const char *err_str;
} static err_msg[] = {
{ dm_msg_ret_ambiguous, "message ambiguous" },
{ dm_msg_ret_inval, "message invalid" },
{ dm_msg_ret_undef, "message undefined" },
{ dm_msg_ret_arg, "message argument" },
{ dm_msg_ret_argcount, "message argument count" },
{ dm_msg_ret_option, "option" },
}, *e = ARRAY_END(err_msg);
while (e-- > err_msg) {
if (test_bit(e->err, &ret))
DMERR("%s %s", caller, e->err_str);
}
}
/* Parse a message action. */
int
dm_message_parse(const char *caller, struct dm_msg *msg, void *context,
int argc, char **argv)
{
int hit = 0;
size_t l1, l_hit = 0;
struct dm_msg_spec *s, *s_hit = NULL,
*s_end = msg->specs + msg->num_specs;
if (argc < 2)
return -EINVAL;
l1 = strlen(*argv);
for (s = msg->specs; s < s_end; s++) {
size_t l2 = strlen(s->cmd);
if (!strnicmp(*argv, s->cmd, min(l1, l2))) {
hit++;
l_hit = l2;
s_hit = s;
}
}
msg->ret = 0;
if (!hit || l1 > l_hit) /* No hit or message string too long. */
set_bit(dm_msg_ret_undef, &msg->ret);
else if (hit > 1) /* Ambiguous message. */
set_bit(dm_msg_ret_ambiguous, &msg->ret);
else if (argc - 2 != s_hit->args->num_args) {
set_bit(dm_msg_ret_undef, &msg->ret);
set_bit(dm_msg_ret_argcount, &msg->ret);
}
if (msg->ret)
goto bad;
msg->action = 0;
msg->spec = s_hit;
set_bit(s_hit->action, &msg->action);
message_options_parse(msg, --argc, ++argv);
if (!msg->ret)
return msg->spec->f(msg, context);
bad:
print_ret(caller, msg->ret);
return -EINVAL;
}
EXPORT_SYMBOL(dm_message_parse);
MODULE_DESCRIPTION(DM_NAME " device-mapper target message parser");
MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
MODULE_LICENSE("GPL");
/*
* Copyright (C) 2007,2008 Red Hat, Inc. All rights reserved.
*
* Module Author: Heinz Mauelshagen <Mauelshagen@RedHat.de>
*
* General device-mapper message interface argument parser.
*
* This file is released under the GPL.
*
*/
#ifndef DM_MESSAGE_H
#define DM_MESSAGE_H
/* Factor out to dm.h. */
/* Reference to array end. */
#define ARRAY_END(a) ((a) + ARRAY_SIZE(a))
/* Message return bits. */
enum dm_message_return {
dm_msg_ret_ambiguous, /* Action ambiguous. */
dm_msg_ret_inval, /* Action invalid. */
dm_msg_ret_undef, /* Action undefined. */
dm_msg_ret_option, /* Option error. */
dm_msg_ret_arg, /* Argument error. */
dm_msg_ret_argcount, /* Argument count error. */
};
/* Message argument type conversions. */
enum dm_message_argument_type {
dm_msg_base_t, /* Basename string. */
dm_msg_str_t, /* String. */
dm_msg_int_t, /* Signed int. */
dm_msg_uint_t, /* Unsigned int. */
dm_msg_uint64_t, /* Unsigned int 64. */
};
/* A message option. */
struct dm_message_option {
unsigned num_options;
char **options;
unsigned long *actions;
};
/* Message arguments and types. */
struct dm_message_argument {
unsigned num_args;
unsigned long **ptr;
enum dm_message_argument_type types[];
};
/* Client message. */
struct dm_msg {
unsigned long action; /* Identified action. */
unsigned long ret; /* Return bits. */
unsigned num_specs; /* # of sepcifications listed. */
struct dm_msg_spec *specs; /* Specification list. */
struct dm_msg_spec *spec; /* Specification selected. */
};
/* Secification of the message. */
struct dm_msg_spec {
const char *cmd; /* Name of the command (i.e. 'bandwidth'). */
unsigned long action;
struct dm_message_option *options;
struct dm_message_argument *args;
unsigned long parm; /* Parameter to pass through to callback. */
/* Function to process for action. */
int (*f) (struct dm_msg *msg, void *context);
};
/* Parameter access macros. */
#define DM_MSG_PARM(msg) ((msg)->spec->parm)
#define DM_MSG_STR_ARGS(msg, idx) ((char *) *(msg)->spec->args->ptr[idx])
#define DM_MSG_INT_ARGS(msg, idx) ((int) *(msg)->spec->args->ptr[idx])
#define DM_MSG_UINT_ARGS(msg, idx) ((unsigned) DM_MSG_INT_ARG(msg, idx))
#define DM_MSG_UINT64_ARGS(msg, idx) ((uint64_t) *(msg)->spec->args->ptr[idx])
#define DM_MSG_STR_ARG(msg) DM_MSG_STR_ARGS(msg, 0)
#define DM_MSG_INT_ARG(msg) DM_MSG_INT_ARGS(msg, 0)
#define DM_MSG_UINT_ARG(msg) DM_MSG_UINT_ARGS(msg, 0)
#define DM_MSG_UINT64_ARG(msg) DM_MSG_UINT64_ARGS(msg, 0)
/* Parse a message and its options and optionally call a function back. */
int dm_message_parse(const char *caller, struct dm_msg *msg, void *context,
int argc, char **argv);
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* Copyright (C) 2006 Red Hat GmbH
*
* Module Author: Heinz Mauelshagen (Mauelshagen@RedHat.com)
*
* This file is released under the GPL.
*
*/
#ifndef _DM_RAID45_H
#define _DM_RAID45_H
/* Factor out to dm.h! */
#define STR_LEN(ptr, str) ptr, str, strlen(ptr)
enum lock_type { RAID45_EX, RAID45_SHARED };
struct dmraid45_locking_type {
/* Request a lock on a stripe. */
void* (*lock)(sector_t key, enum lock_type type);
/* Release a lock on a stripe. */
void (*unlock)(void *lock_handle);
};
#endif
/*
* Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* Module Author: Heinz Mauelshagen (Mauelshagen@RedHat.com)
*
* Locking definitions for the device-mapper RAID45 target.
*
* This file is released under the GPL.
*
*/
#ifndef _DM_RAID45_H
#define _DM_RAID45_H
/* Factor out to dm.h! */
#define STR_LEN(ptr, str) (ptr), (str), strlen((ptr))
enum dm_lock_type { DM_RAID45_EX, DM_RAID45_SHARED };
struct dm_raid45_locking_type {
/* Request a lock on a stripe. */
void* (*lock)(sector_t key, enum dm_lock_type type);
/* Release a lock on a stripe. */
void (*unlock)(void *lock_handle);
};
#endif
/*
* Copyright (C) 2003 Sistina Software Limited.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include <linux/dm-dirty-log.h>
#include "dm-region-hash.h"
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "dm.h"
#define DM_MSG_PREFIX "region hash"
/*-----------------------------------------------------------------
* Region hash
*
* The mirror splits itself up into discrete regions. Each
* region can be in one of three states: clean, dirty,
* nosync. There is no need to put clean regions in the hash.
*
* In addition to being present in the hash table a region _may_
* be present on one of three lists.
*
* clean_regions: Regions on this list have no io pending to
* them, they are in sync, we are no longer interested in them,
* they are dull. dm_rh_update_states() will remove them from the
* hash table.
*
* quiesced_regions: These regions have been spun down, ready
* for recovery. rh_recovery_start() will remove regions from
* this list and hand them to kmirrord, which will schedule the
* recovery io with kcopyd.
*
* recovered_regions: Regions that kcopyd has successfully
* recovered. dm_rh_update_states() will now schedule any delayed
* io, up the recovery_count, and remove the region from the
* hash.
*
* There are 2 locks:
* A rw spin lock 'hash_lock' protects just the hash table,
* this is never held in write mode from interrupt context,
* which I believe means that we only have to disable irqs when
* doing a write lock.
*
* An ordinary spin lock 'region_lock' that protects the three
* lists in the region_hash, with the 'state', 'list' and
* 'delayed_bios' fields of the regions. This is used from irq
* context, so all other uses will have to suspend local irqs.
*---------------------------------------------------------------*/
struct dm_region_hash {
uint32_t region_size;
unsigned region_shift;
/* holds persistent region state */
struct dm_dirty_log *log;
/* hash table */
rwlock_t hash_lock;
mempool_t *region_pool;
unsigned mask;
unsigned nr_buckets;
unsigned prime;
unsigned shift;
struct list_head *buckets;
unsigned max_recovery; /* Max # of regions to recover in parallel */
spinlock_t region_lock;
atomic_t recovery_in_flight;
struct semaphore recovery_count;
struct list_head clean_regions;
struct list_head quiesced_regions;
struct list_head recovered_regions;
struct list_head failed_recovered_regions;
void *context;
sector_t target_begin;
/* Callback function to schedule bios writes */
void (*dispatch_bios)(void *context, struct bio_list *bios);
/* Callback function to wakeup callers worker thread. */
void (*wakeup_workers)(void *context);
/* Callback function to wakeup callers recovery waiters. */
void (*wakeup_all_recovery_waiters)(void *context);
};
struct dm_region {
struct dm_region_hash *rh; /* FIXME: can we get rid of this ? */
region_t key;
int state;
struct list_head hash_list;
struct list_head list;
atomic_t pending;
struct bio_list delayed_bios;
};
/*
* Conversion fns
*/
region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
{
return sector >> rh->region_shift;
}
// EXPORT_SYMBOL_GPL(dm_rh_sector_to_region);
sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
{
return region << rh->region_shift;
}
// EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
{
return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
}
// EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
void *dm_rh_region_context(struct dm_region *reg)
{
return reg->rh->context;
}
// EXPORT_SYMBOL_GPL(dm_rh_region_context);
region_t dm_rh_get_region_key(struct dm_region *reg)
{
return reg->key;
}
// EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
{
return rh->region_size;
}
// EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
/*
* FIXME: shall we pass in a structure instead of all these args to
* dm_region_hash_create()????
*/
#define RH_HASH_MULT 2654435387U
#define RH_HASH_SHIFT 12
#define MIN_REGIONS 64
struct dm_region_hash *dm_region_hash_create(
void *context, void (*dispatch_bios)(void *context,
struct bio_list *bios),
void (*wakeup_workers)(void *context),
void (*wakeup_all_recovery_waiters)(void *context),
sector_t target_begin, unsigned max_recovery,
struct dm_dirty_log *log, uint32_t region_size,
region_t nr_regions)
{
struct dm_region_hash *rh;
unsigned nr_buckets, max_buckets;
size_t i;
/*
* Calculate a suitable number of buckets for our hash
* table.
*/
max_buckets = nr_regions >> 6;
for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
;
nr_buckets >>= 1;
rh = kmalloc(sizeof(*rh), GFP_KERNEL);
if (!rh) {
DMERR("unable to allocate region hash memory");
return ERR_PTR(-ENOMEM);
}
rh->context = context;
rh->dispatch_bios = dispatch_bios;
rh->wakeup_workers = wakeup_workers;
rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
rh->target_begin = target_begin;
rh->max_recovery = max_recovery;
rh->log = log;
rh->region_size = region_size;
rh->region_shift = ffs(region_size) - 1;
rwlock_init(&rh->hash_lock);
rh->mask = nr_buckets - 1;
rh->nr_buckets = nr_buckets;
rh->shift = RH_HASH_SHIFT;
rh->prime = RH_HASH_MULT;
rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
if (!rh->buckets) {
DMERR("unable to allocate region hash bucket memory");
kfree(rh);
return ERR_PTR(-ENOMEM);
}
for (i = 0; i < nr_buckets; i++)
INIT_LIST_HEAD(rh->buckets + i);
spin_lock_init(&rh->region_lock);
sema_init(&rh->recovery_count, 0);
atomic_set(&rh->recovery_in_flight, 0);
INIT_LIST_HEAD(&rh->clean_regions);
INIT_LIST_HEAD(&rh->quiesced_regions);
INIT_LIST_HEAD(&rh->recovered_regions);
INIT_LIST_HEAD(&rh->failed_recovered_regions);
rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
sizeof(struct dm_region));
if (!rh->region_pool) {
vfree(rh->buckets);
kfree(rh);
rh = ERR_PTR(-ENOMEM);
}
return rh;
}
// EXPORT_SYMBOL_GPL(dm_region_hash_create);
void dm_region_hash_destroy(struct dm_region_hash *rh)
{
unsigned h;
struct dm_region *reg, *nreg;
BUG_ON(!list_empty(&rh->quiesced_regions));
for (h = 0; h < rh->nr_buckets; h++) {
list_for_each_entry_safe(reg, nreg, rh->buckets + h,
hash_list) {
BUG_ON(atomic_read(&reg->pending));
mempool_free(reg, rh->region_pool);
}
}
if (rh->log)
dm_dirty_log_destroy(rh->log);
if (rh->region_pool)
mempool_destroy(rh->region_pool);
vfree(rh->buckets);
kfree(rh);
}
// EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
{
return rh->log;
}
// EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
{
return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
}
static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
{
struct dm_region *reg;
struct list_head *bucket = rh->buckets + rh_hash(rh, region);
list_for_each_entry(reg, bucket, hash_list)
if (reg->key == region)
return reg;
return NULL;
}
static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
{
list_add(&reg->hash_list, rh->buckets + rh_hash(rh, reg->key));
}
static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
{
struct dm_region *reg, *nreg;
nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
if (unlikely(!nreg))
nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
DM_RH_CLEAN : DM_RH_NOSYNC;
nreg->rh = rh;
nreg->key = region;
INIT_LIST_HEAD(&nreg->list);
atomic_set(&nreg->pending, 0);
bio_list_init(&nreg->delayed_bios);
write_lock_irq(&rh->hash_lock);
reg = __rh_lookup(rh, region);
if (reg)
/* We lost the race. */
mempool_free(nreg, rh->region_pool);
else {
__rh_insert(rh, nreg);
if (nreg->state == DM_RH_CLEAN) {
spin_lock(&rh->region_lock);
list_add(&nreg->list, &rh->clean_regions);
spin_unlock(&rh->region_lock);
}
reg = nreg;
}
write_unlock_irq(&rh->hash_lock);
return reg;
}
static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
{
struct dm_region *reg;
reg = __rh_lookup(rh, region);
if (!reg) {
read_unlock(&rh->hash_lock);
reg = __rh_alloc(rh, region);
read_lock(&rh->hash_lock);
}
return reg;
}
int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
{
int r;
struct dm_region *reg;
read_lock(&rh->hash_lock);
reg = __rh_lookup(rh, region);
read_unlock(&rh->hash_lock);
if (reg)
return reg->state;
/*
* The region wasn't in the hash, so we fall back to the
* dirty log.
*/
r = rh->log->type->in_sync(rh->log, region, may_block);
/*
* Any error from the dirty log (eg. -EWOULDBLOCK) gets
* taken as a DM_RH_NOSYNC
*/
return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
}
// EXPORT_SYMBOL_GPL(dm_rh_get_state);
static void complete_resync_work(struct dm_region *reg, int success)
{
struct dm_region_hash *rh = reg->rh;
rh->log->type->set_region_sync(rh->log, reg->key, success);
/*
* Dispatch the bios before we call 'wake_up_all'.
* This is important because if we are suspending,
* we want to know that recovery is complete and
* the work queue is flushed. If we wake_up_all
* before we dispatch_bios (queue bios and call wake()),
* then we risk suspending before the work queue
* has been properly flushed.
*/
rh->dispatch_bios(rh->context, &reg->delayed_bios);
if (atomic_dec_and_test(&rh->recovery_in_flight))
rh->wakeup_all_recovery_waiters(rh->context);
up(&rh->recovery_count);
}
/* dm_rh_mark_nosync
* @ms
* @bio
* @done
* @error
*
* The bio was written on some mirror(s) but failed on other mirror(s).
* We can successfully endio the bio but should avoid the region being
* marked clean by setting the state DM_RH_NOSYNC.
*
* This function is _not_ safe in interrupt context!
*/
void dm_rh_mark_nosync(struct dm_region_hash *rh,
struct bio *bio, unsigned done, int error)
{
unsigned long flags;
struct dm_dirty_log *log = rh->log;
struct dm_region *reg;
region_t region = dm_rh_bio_to_region(rh, bio);
int recovering = 0;
/* We must inform the log that the sync count has changed. */
log->type->set_region_sync(log, region, 0);
read_lock(&rh->hash_lock);
reg = __rh_find(rh, region);
read_unlock(&rh->hash_lock);
/* region hash entry should exist because write was in-flight */
BUG_ON(!reg);
BUG_ON(!list_empty(&reg->list));
spin_lock_irqsave(&rh->region_lock, flags);
/*
* Possible cases:
* 1) DM_RH_DIRTY
* 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed
* 3) DM_RH_RECOVERING: flushing pending writes
* Either case, the region should have not been connected to list.
*/
recovering = (reg->state == DM_RH_RECOVERING);
reg->state = DM_RH_NOSYNC;
BUG_ON(!list_empty(&reg->list));
spin_unlock_irqrestore(&rh->region_lock, flags);
bio_endio(bio, error);
if (recovering)
complete_resync_work(reg, 0);
}
// EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
{
struct dm_region *reg, *next;
LIST_HEAD(clean);
LIST_HEAD(recovered);
LIST_HEAD(failed_recovered);
/*
* Quickly grab the lists.
*/
write_lock_irq(&rh->hash_lock);
spin_lock(&rh->region_lock);
if (!list_empty(&rh->clean_regions)) {
list_splice_init(&rh->clean_regions, &clean);
list_for_each_entry(reg, &clean, list)
list_del(&reg->hash_list);
}
if (!list_empty(&rh->recovered_regions)) {
list_splice_init(&rh->recovered_regions, &recovered);
list_for_each_entry(reg, &recovered, list)
list_del(&reg->hash_list);
}
if (!list_empty(&rh->failed_recovered_regions)) {
list_splice_init(&rh->failed_recovered_regions,
&failed_recovered);
list_for_each_entry(reg, &failed_recovered, list)
list_del(&reg->hash_list);
}
spin_unlock(&rh->region_lock);
write_unlock_irq(&rh->hash_lock);
/*
* All the regions on the recovered and clean lists have
* now been pulled out of the system, so no need to do
* any more locking.
*/
list_for_each_entry_safe(reg, next, &recovered, list) {
rh->log->type->clear_region(rh->log, reg->key);
complete_resync_work(reg, 1);
mempool_free(reg, rh->region_pool);
}
list_for_each_entry_safe(reg, next, &failed_recovered, list) {
complete_resync_work(reg, errors_handled ? 0 : 1);
mempool_free(reg, rh->region_pool);
}
list_for_each_entry_safe(reg, next, &clean, list) {
rh->log->type->clear_region(rh->log, reg->key);
mempool_free(reg, rh->region_pool);
}
rh->log->type->flush(rh->log);
}
// EXPORT_SYMBOL_GPL(dm_rh_update_states);
void dm_rh_inc(struct dm_region_hash *rh, region_t region)
{
struct dm_region *reg;
read_lock(&rh->hash_lock);
reg = __rh_find(rh, region);
spin_lock_irq(&rh->region_lock);
atomic_inc(&reg->pending);
if (reg->state == DM_RH_CLEAN) {
reg->state = DM_RH_DIRTY;
list_del_init(&reg->list); /* take off the clean list */
spin_unlock_irq(&rh->region_lock);
rh->log->type->mark_region(rh->log, reg->key);
} else
spin_unlock_irq(&rh->region_lock);
read_unlock(&rh->hash_lock);
}
// EXPORT_SYMBOL_GPL(dm_rh_inc);
void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
{
struct bio *bio;
for (bio = bios->head; bio; bio = bio->bi_next)
dm_rh_inc(rh, dm_rh_bio_to_region(rh, bio));
}
// EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
void dm_rh_dec(struct dm_region_hash *rh, region_t region)
{
unsigned long flags;
struct dm_region *reg;
int should_wake = 0;
read_lock(&rh->hash_lock);
reg = __rh_lookup(rh, region);
read_unlock(&rh->hash_lock);
spin_lock_irqsave(&rh->region_lock, flags);
if (atomic_dec_and_test(&reg->pending)) {
/*
* There is no pending I/O for this region.
* We can move the region to corresponding list for next action.
* At this point, the region is not yet connected to any list.
*
* If the state is DM_RH_NOSYNC, the region should be kept off
* from clean list.
* The hash entry for DM_RH_NOSYNC will remain in memory
* until the region is recovered or the map is reloaded.
*/
/* do nothing for DM_RH_NOSYNC */
if (reg->state == DM_RH_RECOVERING) {
list_add_tail(&reg->list, &rh->quiesced_regions);
} else if (reg->state == DM_RH_DIRTY) {
reg->state = DM_RH_CLEAN;
list_add(&reg->list, &rh->clean_regions);
}
should_wake = 1;
}
spin_unlock_irqrestore(&rh->region_lock, flags);
if (should_wake)
rh->wakeup_workers(rh->context);
}
// EXPORT_SYMBOL_GPL(dm_rh_dec);
/*
* Starts quiescing a region in preparation for recovery.
*/
static int __rh_recovery_prepare(struct dm_region_hash *rh)
{
int r;
region_t region;
struct dm_region *reg;
/*
* Ask the dirty log what's next.
*/
r = rh->log->type->get_resync_work(rh->log, &region);
if (r <= 0)
return r;
/*
* Get this region, and start it quiescing by setting the
* recovering flag.
*/
read_lock(&rh->hash_lock);
reg = __rh_find(rh, region);
read_unlock(&rh->hash_lock);
spin_lock_irq(&rh->region_lock);
reg->state = DM_RH_RECOVERING;
/* Already quiesced ? */
if (atomic_read(&reg->pending))
list_del_init(&reg->list);
else
list_move(&reg->list, &rh->quiesced_regions);
spin_unlock_irq(&rh->region_lock);
return 1;
}
void dm_rh_recovery_prepare(struct dm_region_hash *rh)
{
/* Extra reference to avoid race with dm_rh_stop_recovery */
atomic_inc(&rh->recovery_in_flight);
while (!down_trylock(&rh->recovery_count)) {
atomic_inc(&rh->recovery_in_flight);
if (__rh_recovery_prepare(rh) <= 0) {
atomic_dec(&rh->recovery_in_flight);
up(&rh->recovery_count);
break;
}
}
/* Drop the extra reference */
if (atomic_dec_and_test(&rh->recovery_in_flight))
rh->wakeup_all_recovery_waiters(rh->context);
}
// EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
/*
* Returns any quiesced regions.
*/
struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
{
struct dm_region *reg = NULL;
spin_lock_irq(&rh->region_lock);
if (!list_empty(&rh->quiesced_regions)) {
reg = list_entry(rh->quiesced_regions.next,
struct dm_region, list);
list_del_init(&reg->list); /* remove from the quiesced list */
}
spin_unlock_irq(&rh->region_lock);
return reg;
}
// EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
void dm_rh_recovery_end(struct dm_region *reg, int success)
{
struct dm_region_hash *rh = reg->rh;
spin_lock_irq(&rh->region_lock);
if (success)
list_add(&reg->list, &reg->rh->recovered_regions);
else {
reg->state = DM_RH_NOSYNC;
list_add(&reg->list, &reg->rh->failed_recovered_regions);
}
spin_unlock_irq(&rh->region_lock);
rh->wakeup_workers(rh->context);
}
// EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
/* Return recovery in flight count. */
int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
{
return atomic_read(&rh->recovery_in_flight);
}
// EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
int dm_rh_flush(struct dm_region_hash *rh)
{
return rh->log->type->flush(rh->log);
}
// EXPORT_SYMBOL_GPL(dm_rh_flush);
void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
{
struct dm_region *reg;
read_lock(&rh->hash_lock);
reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
bio_list_add(&reg->delayed_bios, bio);
read_unlock(&rh->hash_lock);
}
// EXPORT_SYMBOL_GPL(dm_rh_delay);
void dm_rh_delay_by_region(struct dm_region_hash *rh,
struct bio *bio, region_t region)
{
struct dm_region *reg;
/* FIXME: locking. */
read_lock(&rh->hash_lock);
reg = __rh_find(rh, region);
bio_list_add(&reg->delayed_bios, bio);
read_unlock(&rh->hash_lock);
}
// EXPORT_SYMBOL_GPL(dm_rh_delay_by_region);
void dm_rh_stop_recovery(struct dm_region_hash *rh)
{
int i;
/* wait for any recovering regions */
for (i = 0; i < rh->max_recovery; i++)
down(&rh->recovery_count);
}
// EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
void dm_rh_start_recovery(struct dm_region_hash *rh)
{
int i;
for (i = 0; i < rh->max_recovery; i++)
up(&rh->recovery_count);
rh->wakeup_workers(rh->context);
}
// EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
MODULE_DESCRIPTION(DM_NAME " region hash");
MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");
/*
* Copyright (C) 2003 Sistina Software Limited.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* Device-Mapper dirty region hash interface.
*
* This file is released under the GPL.
*/
#ifndef DM_REGION_HASH_H
#define DM_REGION_HASH_H
#include <linux/dm-dirty-log.h>
/*-----------------------------------------------------------------
* Region hash
*----------------------------------------------------------------*/
struct dm_region_hash;
struct dm_region;
/*
* States a region can have.
*/
enum dm_rh_region_states {
DM_RH_CLEAN = 0x01, /* No writes in flight. */
DM_RH_DIRTY = 0x02, /* Writes in flight. */
DM_RH_NOSYNC = 0x04, /* Out of sync. */
DM_RH_RECOVERING = 0x08, /* Under resynchronization. */
};
/*
* Region hash create/destroy.
*/
struct bio_list;
struct dm_region_hash *dm_region_hash_create(
void *context, void (*dispatch_bios)(void *context,
struct bio_list *bios),
void (*wakeup_workers)(void *context),
void (*wakeup_all_recovery_waiters)(void *context),
sector_t target_begin, unsigned max_recovery,
struct dm_dirty_log *log, uint32_t region_size,
region_t nr_regions);
void dm_region_hash_destroy(struct dm_region_hash *rh);
struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh);
/*
* Conversion functions.
*/
region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio);
sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region);
region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector);
void *dm_rh_region_context(struct dm_region *reg);
/*
* Get region size and key (ie. number of the region).
*/
sector_t dm_rh_get_region_size(struct dm_region_hash *rh);
region_t dm_rh_get_region_key(struct dm_region *reg);
/*
* Get/set/update region state (and dirty log).
*
*/
int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block);
void dm_rh_set_state(struct dm_region_hash *rh, region_t region,
enum dm_rh_region_states state, int may_block);
/* Non-zero errors_handled leaves the state of the region NOSYNC */
void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled);
/* Flush the region hash and dirty log. */
int dm_rh_flush(struct dm_region_hash *rh);
/* Inc/dec pending count on regions. */
void dm_rh_inc(struct dm_region_hash *rh, region_t region);
void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios);
void dm_rh_dec(struct dm_region_hash *rh, region_t region);
/* Delay bios on regions. */
void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio);
void dm_rh_delay_by_region(struct dm_region_hash *rh, struct bio *bio,
region_t region);
void dm_rh_mark_nosync(struct dm_region_hash *rh,
struct bio *bio, unsigned done, int error);
/*
* Region recovery control.
*/
/* Prepare some regions for recovery by starting to quiesce them. */
void dm_rh_recovery_prepare(struct dm_region_hash *rh);
/* Try fetching a quiesced region for recovery. */
struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh);
/* Report recovery end on a region. */
void dm_rh_recovery_end(struct dm_region *reg, int error);
/* Returns number of regions with recovery work outstanding. */
int dm_rh_recovery_in_flight(struct dm_region_hash *rh);
/* Start/stop recovery. */
void dm_rh_start_recovery(struct dm_region_hash *rh);
void dm_rh_stop_recovery(struct dm_region_hash *rh);
#endif /* DM_REGION_HASH_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment