Commit 1fe10e2f authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Implement queue congestion callout for device mapper

From: Miquel van Smoorenburg <miquels@cistron.nl>
      Joe Thornber <thornber@redhat.com>

This implements the queue congestion callout for DM stacks.  To make
bdi_read/write_congested() return correct information.

- md->lock protects all fields in md _except_ md->map
- md->map_lock protects md->map
- Anyone who wants to read md->map should use dm_get_table() which
  increments the tables reference count.

This means the spin lock is now only held for the duration of a
reference count increment.

Udpate:

dm.c: protect md->map with a rw spin lock rather than the md->lock
semaphore.  Also ensure that everyone accesses md->map through
dm_get_table(), rather than directly.
parent 6a435d69
......@@ -279,6 +279,9 @@ void dm_table_get(struct dm_table *t)
void dm_table_put(struct dm_table *t)
{
if (!t)
return;
if (atomic_dec_and_test(&t->holders))
table_destroy(t);
}
......@@ -867,6 +870,20 @@ void dm_table_resume_targets(struct dm_table *t)
}
}
int dm_table_any_congested(struct dm_table *t, int bdi_bits)
{
struct list_head *d, *devices;
int r = 0;
devices = dm_table_get_devices(t);
for (d = devices->next; d != devices; d = d->next) {
struct dm_dev *dd = list_entry(d, struct dm_dev, list);
request_queue_t *q = bdev_get_queue(dd->bdev);
r |= bdi_congested(&q->backing_dev_info, bdi_bits);
}
return r;
}
EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device);
......
......@@ -49,6 +49,7 @@ struct target_io {
struct mapped_device {
struct rw_semaphore lock;
rwlock_t map_lock;
atomic_t holders;
unsigned long flags;
......@@ -237,6 +238,24 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
return 0; /* deferred successfully */
}
/*
* Everyone (including functions in this file), should use this
* function to access the md->map field, and make sure they call
* dm_table_put() when finished.
*/
struct dm_table *dm_get_table(struct mapped_device *md)
{
struct dm_table *t;
read_lock(&md->map_lock);
t = md->map;
if (t)
dm_table_get(t);
read_unlock(&md->map_lock);
return t;
}
/*-----------------------------------------------------------------
* CRUD START:
* A more elegant soln is in the works that uses the queue
......@@ -345,6 +364,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
struct clone_info {
struct mapped_device *md;
struct dm_table *map;
struct bio *bio;
struct dm_io *io;
sector_t sector;
......@@ -398,7 +418,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
static void __clone_and_map(struct clone_info *ci)
{
struct bio *clone, *bio = ci->bio;
struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
struct dm_target *ti = dm_table_find_target(ci->map, ci->sector);
sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti);
struct target_io *tio;
......@@ -459,7 +479,7 @@ static void __clone_and_map(struct clone_info *ci)
ci->sector += max;
ci->sector_count -= max;
ti = dm_table_find_target(ci->md->map, ci->sector);
ti = dm_table_find_target(ci->map, ci->sector);
len = to_sector(bv->bv_len) - max;
clone = split_bvec(bio, ci->sector, ci->idx,
......@@ -484,6 +504,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
struct clone_info ci;
ci.md = md;
ci.map = dm_get_table(md);
ci.bio = bio;
ci.io = alloc_io(md);
ci.io->error = 0;
......@@ -500,6 +521,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */
dec_pending(ci.io, 0);
dm_table_put(ci.map);
}
/*-----------------------------------------------------------------
* CRUD END
......@@ -559,6 +581,22 @@ static int dm_request(request_queue_t *q, struct bio *bio)
return 0;
}
static int dm_any_congested(void *congested_data, int bdi_bits)
{
int r;
struct mapped_device *md = (struct mapped_device *) congested_data;
struct dm_table *map = dm_get_table(md);
if (!map || test_bit(DMF_BLOCK_IO, &md->flags))
/* FIXME: shouldn't suspended count a congested ? */
r = bdi_bits;
else
r = dm_table_any_congested(map, bdi_bits);
dm_table_put(map);
return r;
}
/*-----------------------------------------------------------------
* A bitset is used to keep track of allocated minor numbers.
*---------------------------------------------------------------*/
......@@ -630,6 +668,7 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
memset(md, 0, sizeof(*md));
init_rwsem(&md->lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
md->queue = blk_alloc_queue(GFP_KERNEL);
......@@ -637,6 +676,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
goto bad1;
md->queue->queuedata = md;
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
......@@ -727,22 +768,28 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
if (size == 0)
return 0;
write_lock(&md->map_lock);
md->map = t;
dm_table_event_callback(md->map, event_callback, md);
write_unlock(&md->map_lock);
dm_table_get(t);
dm_table_event_callback(md->map, event_callback, md);
dm_table_set_restrictions(t, q);
return 0;
}
static void __unbind(struct mapped_device *md)
{
if (!md->map)
struct dm_table *map = md->map;
if (!map)
return;
dm_table_event_callback(md->map, NULL, NULL);
dm_table_put(md->map);
dm_table_event_callback(map, NULL, NULL);
write_lock(&md->map_lock);
md->map = NULL;
write_unlock(&md->map_lock);
dm_table_put(map);
}
/*
......@@ -778,12 +825,16 @@ void dm_get(struct mapped_device *md)
void dm_put(struct mapped_device *md)
{
struct dm_table *map = dm_get_table(md);
if (atomic_dec_and_test(&md->holders)) {
if (!test_bit(DMF_SUSPENDED, &md->flags) && md->map)
dm_table_suspend_targets(md->map);
if (!test_bit(DMF_SUSPENDED, &md->flags) && map)
dm_table_suspend_targets(map);
__unbind(md);
free_dev(md);
}
dm_table_put(map);
}
/*
......@@ -834,6 +885,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
*/
int dm_suspend(struct mapped_device *md)
{
struct dm_table *map;
DECLARE_WAITQUEUE(wait, current);
down_write(&md->lock);
......@@ -869,8 +921,11 @@ int dm_suspend(struct mapped_device *md)
down_write(&md->lock);
remove_wait_queue(&md->wait, &wait);
set_bit(DMF_SUSPENDED, &md->flags);
if (md->map)
dm_table_suspend_targets(md->map);
map = dm_get_table(md);
if (map)
dm_table_suspend_targets(map);
dm_table_put(map);
up_write(&md->lock);
return 0;
......@@ -879,22 +934,25 @@ int dm_suspend(struct mapped_device *md)
int dm_resume(struct mapped_device *md)
{
struct bio *def;
struct dm_table *map = dm_get_table(md);
down_write(&md->lock);
if (!md->map ||
if (!map ||
!test_bit(DMF_SUSPENDED, &md->flags) ||
!dm_table_get_size(md->map)) {
!dm_table_get_size(map)) {
up_write(&md->lock);
dm_table_put(map);
return -EINVAL;
}
dm_table_resume_targets(md->map);
dm_table_resume_targets(map);
clear_bit(DMF_SUSPENDED, &md->flags);
clear_bit(DMF_BLOCK_IO, &md->flags);
def = bio_list_get(&md->deferred);
__flush_deferred_io(md, def);
up_write(&md->lock);
dm_table_put(map);
blk_run_queues();
......@@ -946,19 +1004,6 @@ struct gendisk *dm_disk(struct mapped_device *md)
return md->disk;
}
struct dm_table *dm_get_table(struct mapped_device *md)
{
struct dm_table *t;
down_read(&md->lock);
t = md->map;
if (t)
dm_table_get(t);
up_read(&md->lock);
return t;
}
int dm_suspended(struct mapped_device *md)
{
return test_bit(DMF_SUSPENDED, &md->flags);
......
......@@ -115,6 +115,7 @@ struct list_head *dm_table_get_devices(struct dm_table *t);
int dm_table_get_mode(struct dm_table *t);
void dm_table_suspend_targets(struct dm_table *t);
void dm_table_resume_targets(struct dm_table *t);
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
/*-----------------------------------------------------------------
* A registry of target types.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment