Commit 140dfc92 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-3.19-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Significant DM thin-provisioning performance improvements to meet
   performance requirements that were requested by the Gluster
   distributed filesystem.

   Specifically, dm-thinp now takes care to aggregate IO that will be
   issued to the same thinp block before issuing IO to the underlying
   devices.  This really helps improve performance on HW RAID6 devices
   that have a writeback cache because it avoids RMW in the HW RAID
   controller.

 - Some stable fixes: fix leak in DM bufio if integrity profiles were
   enabled, use memzero_explicit in DM crypt to avoid any potential for
   information leak, and a DM cache fix to properly mark a cache block
   dirty if it was promoted to the cache via the overwrite optimization.

 - A few simple DM persistent data library fixes

 - DM cache multiqueue policy block promotion improvements.

 - DM cache discard improvements that take advantage of range
   (multiblock) discard support in the DM bio-prison.  This allows for
   much more efficient bulk discard processing (e.g.  when mkfs.xfs
   discards the entire device).

 - Some small optimizations in DM core and RCU deference cleanups

 - DM core changes to suspend/resume code to introduce the new internal
   suspend/resume interface that the DM thin-pool target now uses to
   suspend/resume active thin devices when the thin-pool must
   suspend/resume.

   This avoids forcing userspace to track all active thin volumes in a
   thin-pool when the thin-pool is suspended for the purposes of
   metadata or data space resize.

* tag 'dm-3.19-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (49 commits)
  dm crypt: use memzero_explicit for on-stack buffer
  dm space map metadata: fix sm_bootstrap_get_count()
  dm space map metadata: fix sm_bootstrap_get_nr_blocks()
  dm bufio: fix memleak when using a dm_buffer's inline bio
  dm cache: fix spurious cell_defer when dealing with partial block at end of device
  dm cache: dirty flag was mistakenly being cleared when promoting via overwrite
  dm cache: only use overwrite optimisation for promotion when in writeback mode
  dm cache: discard block size must be a multiple of cache block size
  dm cache: fix a harmless race when working out if a block is discarded
  dm cache: when reloading a discard bitset allow for a different discard block size
  dm cache: fix some issues with the new discard range support
  dm array: if resizing the array is a noop set the new root to the old one
  dm: use rcu_dereference_protected instead of rcu_dereference
  dm thin: fix pool_io_hints to avoid looking at max_hw_sectors
  dm thin: suspend/resume active thin devices when reloading thin-pool
  dm: enhance internal suspend and resume interface
  dm thin: do not allow thin device activation while pool is suspended
  dm: add presuspend_undo hook to target_type
  dm: return earlier from dm_blk_ioctl if target doesn't implement .ioctl
  dm thin: remove stale 'trim' message in block comment above pool_message
  ...
parents f94784bd 1a71d6ff
......@@ -47,20 +47,26 @@ Message and constructor argument pairs are:
'discard_promote_adjustment <value>'
The sequential threshold indicates the number of contiguous I/Os
required before a stream is treated as sequential. The random threshold
required before a stream is treated as sequential. Once a stream is
considered sequential it will bypass the cache. The random threshold
is the number of intervening non-contiguous I/Os that must be seen
before the stream is treated as random again.
The sequential and random thresholds default to 512 and 4 respectively.
Large, sequential ios are probably better left on the origin device
since spindles tend to have good bandwidth. The io_tracker counts
contiguous I/Os to try to spot when the io is in one of these sequential
modes.
Internally the mq policy maintains a promotion threshold variable. If
the hit count of a block not in the cache goes above this threshold it
gets promoted to the cache. The read, write and discard promote adjustment
Large, sequential I/Os are probably better left on the origin device
since spindles tend to have good sequential I/O bandwidth. The
io_tracker counts contiguous I/Os to try to spot when the I/O is in one
of these sequential modes. But there are use-cases for wanting to
promote sequential blocks to the cache (e.g. fast application startup).
If sequential threshold is set to 0 the sequential I/O detection is
disabled and sequential I/O will no longer implicitly bypass the cache.
Setting the random threshold to 0 does _not_ disable the random I/O
stream detection.
Internally the mq policy determines a promotion threshold. If the hit
count of a block not in the cache goes above this threshold it gets
promoted to the cache. The read, write and discard promote adjustment
tunables allow you to tweak the promotion threshold by adding a small
value based on the io type. They default to 4, 8 and 1 respectively.
If you're trying to quickly warm a new cache device you may wish to
......
......@@ -14,68 +14,38 @@
/*----------------------------------------------------------------*/
struct bucket {
spinlock_t lock;
struct hlist_head cells;
};
#define MIN_CELLS 1024
struct dm_bio_prison {
spinlock_t lock;
mempool_t *cell_pool;
unsigned nr_buckets;
unsigned hash_mask;
struct bucket *buckets;
struct rb_root cells;
};
/*----------------------------------------------------------------*/
static uint32_t calc_nr_buckets(unsigned nr_cells)
{
uint32_t n = 128;
nr_cells /= 4;
nr_cells = min(nr_cells, 8192u);
while (n < nr_cells)
n <<= 1;
return n;
}
static struct kmem_cache *_cell_cache;
static void init_bucket(struct bucket *b)
{
spin_lock_init(&b->lock);
INIT_HLIST_HEAD(&b->cells);
}
/*----------------------------------------------------------------*/
/*
* @nr_cells should be the number of cells you want in use _concurrently_.
* Don't confuse it with the number of distinct keys.
*/
struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
struct dm_bio_prison *dm_bio_prison_create(void)
{
unsigned i;
uint32_t nr_buckets = calc_nr_buckets(nr_cells);
size_t len = sizeof(struct dm_bio_prison) +
(sizeof(struct bucket) * nr_buckets);
struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);
struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
if (!prison)
return NULL;
prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
spin_lock_init(&prison->lock);
prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
if (!prison->cell_pool) {
kfree(prison);
return NULL;
}
prison->nr_buckets = nr_buckets;
prison->hash_mask = nr_buckets - 1;
prison->buckets = (struct bucket *) (prison + 1);
for (i = 0; i < nr_buckets; i++)
init_bucket(prison->buckets + i);
prison->cells = RB_ROOT;
return prison;
}
......@@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
static void __setup_new_cell(struct dm_cell_key *key,
struct bio *holder,
struct dm_bio_prison_cell *cell)
{
const unsigned long BIG_PRIME = 4294967291UL;
uint64_t hash = key->block * BIG_PRIME;
return (uint32_t) (hash & prison->hash_mask);
memcpy(&cell->key, key, sizeof(cell->key));
cell->holder = holder;
bio_list_init(&cell->bios);
}
static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs)
static int cmp_keys(struct dm_cell_key *lhs,
struct dm_cell_key *rhs)
{
return (lhs->virtual == rhs->virtual) &&
(lhs->dev == rhs->dev) &&
(lhs->block == rhs->block);
}
if (lhs->virtual < rhs->virtual)
return -1;
static struct bucket *get_bucket(struct dm_bio_prison *prison,
struct dm_cell_key *key)
{
return prison->buckets + hash_key(prison, key);
}
if (lhs->virtual > rhs->virtual)
return 1;
static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
struct dm_cell_key *key)
{
struct dm_bio_prison_cell *cell;
if (lhs->dev < rhs->dev)
return -1;
hlist_for_each_entry(cell, &b->cells, list)
if (keys_equal(&cell->key, key))
return cell;
if (lhs->dev > rhs->dev)
return 1;
return NULL;
}
if (lhs->block_end <= rhs->block_begin)
return -1;
static void __setup_new_cell(struct bucket *b,
struct dm_cell_key *key,
struct bio *holder,
struct dm_bio_prison_cell *cell)
{
memcpy(&cell->key, key, sizeof(cell->key));
cell->holder = holder;
bio_list_init(&cell->bios);
hlist_add_head(&cell->list, &b->cells);
if (lhs->block_begin >= rhs->block_end)
return 1;
return 0;
}
static int __bio_detain(struct bucket *b,
static int __bio_detain(struct dm_bio_prison *prison,
struct dm_cell_key *key,
struct bio *inmate,
struct dm_bio_prison_cell *cell_prealloc,
struct dm_bio_prison_cell **cell_result)
{
struct dm_bio_prison_cell *cell;
cell = __search_bucket(b, key);
if (cell) {
if (inmate)
bio_list_add(&cell->bios, inmate);
*cell_result = cell;
return 1;
int r;
struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
while (*new) {
struct dm_bio_prison_cell *cell =
container_of(*new, struct dm_bio_prison_cell, node);
r = cmp_keys(key, &cell->key);
parent = *new;
if (r < 0)
new = &((*new)->rb_left);
else if (r > 0)
new = &((*new)->rb_right);
else {
if (inmate)
bio_list_add(&cell->bios, inmate);
*cell_result = cell;
return 1;
}
}
__setup_new_cell(b, key, inmate, cell_prealloc);
__setup_new_cell(key, inmate, cell_prealloc);
*cell_result = cell_prealloc;
rb_link_node(&cell_prealloc->node, parent, new);
rb_insert_color(&cell_prealloc->node, &prison->cells);
return 0;
}
......@@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison,
{
int r;
unsigned long flags;
struct bucket *b = get_bucket(prison, key);
spin_lock_irqsave(&b->lock, flags);
r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
spin_unlock_irqrestore(&b->lock, flags);
spin_lock_irqsave(&prison->lock, flags);
r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
spin_unlock_irqrestore(&prison->lock, flags);
return r;
}
......@@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell);
/*
* @inmates must have been initialised prior to this call
*/
static void __cell_release(struct dm_bio_prison_cell *cell,
static void __cell_release(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
hlist_del(&cell->list);
rb_erase(&cell->node, &prison->cells);
if (inmates) {
if (cell->holder)
......@@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison,
struct bio_list *bios)
{
unsigned long flags;
struct bucket *b = get_bucket(prison, &cell->key);
spin_lock_irqsave(&b->lock, flags);
__cell_release(cell, bios);
spin_unlock_irqrestore(&b->lock, flags);
spin_lock_irqsave(&prison->lock, flags);
__cell_release(prison, cell, bios);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
static void __cell_release_no_holder(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
hlist_del(&cell->list);
rb_erase(&cell->node, &prison->cells);
bio_list_merge(inmates, &cell->bios);
}
......@@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct bio_list *inmates)
{
unsigned long flags;
struct bucket *b = get_bucket(prison, &cell->key);
spin_lock_irqsave(&b->lock, flags);
__cell_release_no_holder(cell, inmates);
spin_unlock_irqrestore(&b->lock, flags);
spin_lock_irqsave(&prison->lock, flags);
__cell_release_no_holder(prison, cell, inmates);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
......@@ -267,6 +241,20 @@ void dm_cell_error(struct dm_bio_prison *prison,
}
EXPORT_SYMBOL_GPL(dm_cell_error);
void dm_cell_visit_release(struct dm_bio_prison *prison,
void (*visit_fn)(void *, struct dm_bio_prison_cell *),
void *context,
struct dm_bio_prison_cell *cell)
{
unsigned long flags;
spin_lock_irqsave(&prison->lock, flags);
visit_fn(context, cell);
rb_erase(&cell->node, &prison->cells);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_visit_release);
/*----------------------------------------------------------------*/
#define DEFERRED_SET_SIZE 64
......
......@@ -10,8 +10,8 @@
#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */
#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */
#include <linux/list.h>
#include <linux/bio.h>
#include <linux/rbtree.h>
/*----------------------------------------------------------------*/
......@@ -23,11 +23,14 @@
*/
struct dm_bio_prison;
/* FIXME: this needs to be more abstract */
/*
* Keys define a range of blocks within either a virtual or physical
* device.
*/
struct dm_cell_key {
int virtual;
dm_thin_id dev;
dm_block_t block;
dm_block_t block_begin, block_end;
};
/*
......@@ -35,13 +38,15 @@ struct dm_cell_key {
* themselves.
*/
struct dm_bio_prison_cell {
struct hlist_node list;
struct list_head user_list; /* for client use */
struct rb_node node;
struct dm_cell_key key;
struct bio *holder;
struct bio_list bios;
};
struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells);
struct dm_bio_prison *dm_bio_prison_create(void);
void dm_bio_prison_destroy(struct dm_bio_prison *prison);
/*
......@@ -57,7 +62,7 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell);
/*
* Creates, or retrieves a cell for the given key.
* Creates, or retrieves a cell that overlaps the given key.
*
* Returns 1 if pre-existing cell returned, zero if new cell created using
* @cell_prealloc.
......@@ -68,7 +73,8 @@ int dm_get_cell(struct dm_bio_prison *prison,
struct dm_bio_prison_cell **cell_result);
/*
* An atomic op that combines retrieving a cell, and adding a bio to it.
* An atomic op that combines retrieving or creating a cell, and adding a
* bio to it.
*
* Returns 1 if the cell was already held, 0 if @inmate is the new holder.
*/
......@@ -87,6 +93,14 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
void dm_cell_error(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell, int error);
/*
* Visits the cell and then releases. Guarantees no new inmates are
* inserted between the visit and release.
*/
void dm_cell_visit_release(struct dm_bio_prison *prison,
void (*visit_fn)(void *, struct dm_bio_prison_cell *),
void *context, struct dm_bio_prison_cell *cell);
/*----------------------------------------------------------------*/
/*
......
This diff is collapsed.
......@@ -19,6 +19,7 @@
typedef dm_block_t __bitwise__ dm_oblock_t;
typedef uint32_t __bitwise__ dm_cblock_t;
typedef dm_block_t __bitwise__ dm_dblock_t;
static inline dm_oblock_t to_oblock(dm_block_t b)
{
......@@ -40,4 +41,14 @@ static inline uint32_t from_cblock(dm_cblock_t b)
return (__force uint32_t) b;
}
static inline dm_dblock_t to_dblock(dm_block_t b)
{
return (__force dm_dblock_t) b;
}
static inline dm_block_t from_dblock(dm_dblock_t b)
{
return (__force dm_block_t) b;
}
#endif /* DM_CACHE_BLOCK_TYPES_H */
......@@ -109,7 +109,7 @@ struct dm_cache_metadata {
dm_block_t discard_root;
sector_t discard_block_size;
dm_oblock_t discard_nr_blocks;
dm_dblock_t discard_nr_blocks;
sector_t data_block_size;
dm_cblock_t cache_blocks;
......@@ -329,7 +329,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0);
......@@ -528,7 +528,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->hint_root = le64_to_cpu(disk_super->hint_root);
cmd->discard_root = le64_to_cpu(disk_super->discard_root);
cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks));
cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
......@@ -626,7 +626,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
......@@ -797,15 +797,15 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
sector_t discard_block_size,
dm_oblock_t new_nr_entries)
dm_dblock_t new_nr_entries)
{
int r;
down_write(&cmd->root_lock);
r = dm_bitset_resize(&cmd->discard_info,
cmd->discard_root,
from_oblock(cmd->discard_nr_blocks),
from_oblock(new_nr_entries),
from_dblock(cmd->discard_nr_blocks),
from_dblock(new_nr_entries),
false, &cmd->discard_root);
if (!r) {
cmd->discard_block_size = discard_block_size;
......@@ -818,28 +818,28 @@ int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
return r;
}
static int __set_discard(struct dm_cache_metadata *cmd, dm_oblock_t b)
static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
{
return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
from_oblock(b), &cmd->discard_root);
from_dblock(b), &cmd->discard_root);
}
static int __clear_discard(struct dm_cache_metadata *cmd, dm_oblock_t b)
static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
{
return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
from_oblock(b), &cmd->discard_root);
from_dblock(b), &cmd->discard_root);
}
static int __is_discarded(struct dm_cache_metadata *cmd, dm_oblock_t b,
static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
bool *is_discarded)
{
return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
from_oblock(b), &cmd->discard_root,
from_dblock(b), &cmd->discard_root,
is_discarded);
}
static int __discard(struct dm_cache_metadata *cmd,
dm_oblock_t dblock, bool discard)
dm_dblock_t dblock, bool discard)
{
int r;
......@@ -852,7 +852,7 @@ static int __discard(struct dm_cache_metadata *cmd,
}
int dm_cache_set_discard(struct dm_cache_metadata *cmd,
dm_oblock_t dblock, bool discard)
dm_dblock_t dblock, bool discard)
{
int r;
......@@ -870,8 +870,8 @@ static int __load_discards(struct dm_cache_metadata *cmd,
dm_block_t b;
bool discard;
for (b = 0; b < from_oblock(cmd->discard_nr_blocks); b++) {
dm_oblock_t dblock = to_oblock(b);
for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
dm_dblock_t dblock = to_dblock(b);
if (cmd->clean_when_opened) {
r = __is_discarded(cmd, dblock, &discard);
......
......@@ -70,14 +70,14 @@ dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd);
int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
sector_t discard_block_size,
dm_oblock_t new_nr_entries);
dm_dblock_t new_nr_entries);
typedef int (*load_discard_fn)(void *context, sector_t discard_block_size,
dm_oblock_t dblock, bool discarded);
dm_dblock_t dblock, bool discarded);
int dm_cache_load_discards(struct dm_cache_metadata *cmd,
load_discard_fn fn, void *context);
int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_oblock_t dblock, bool discard);
int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard);
int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock);
int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock);
......
......@@ -181,24 +181,30 @@ static void queue_shift_down(struct queue *q)
* Gives us the oldest entry of the lowest popoulated level. If the first
* level is emptied then we shift down one level.
*/
static struct list_head *queue_pop(struct queue *q)
static struct list_head *queue_peek(struct queue *q)
{
unsigned level;
struct list_head *r;
for (level = 0; level < NR_QUEUE_LEVELS; level++)
if (!list_empty(q->qs + level)) {
r = q->qs[level].next;
list_del(r);
if (!list_empty(q->qs + level))
return q->qs[level].next;
/* have we just emptied the bottom level? */
if (level == 0 && list_empty(q->qs))
queue_shift_down(q);
return NULL;
}
return r;
}
static struct list_head *queue_pop(struct queue *q)
{
struct list_head *r = queue_peek(q);
return NULL;
if (r) {
list_del(r);
/* have we just emptied the bottom level? */
if (list_empty(q->qs))
queue_shift_down(q);
}
return r;
}
static struct list_head *list_pop(struct list_head *lh)
......@@ -383,13 +389,6 @@ struct mq_policy {
unsigned generation;
unsigned generation_period; /* in lookups (will probably change) */
/*
* Entries in the pre_cache whose hit count passes the promotion
* threshold move to the cache proper. Working out the correct
* value for the promotion_threshold is crucial to this policy.
*/
unsigned promote_threshold;
unsigned discard_promote_adjustment;
unsigned read_promote_adjustment;
unsigned write_promote_adjustment;
......@@ -406,6 +405,7 @@ struct mq_policy {
#define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1
#define DEFAULT_READ_PROMOTE_ADJUSTMENT 4
#define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8
#define DISCOURAGE_DEMOTING_DIRTY_THRESHOLD 128
/*----------------------------------------------------------------*/
......@@ -518,6 +518,12 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q)
return e;
}
static struct entry *peek(struct queue *q)
{
struct list_head *h = queue_peek(q);
return h ? container_of(h, struct entry, list) : NULL;
}
/*
* Has this entry already been updated?
*/
......@@ -570,10 +576,6 @@ static void check_generation(struct mq_policy *mq)
break;
}
}
mq->promote_threshold = nr ? total / nr : 1;
if (mq->promote_threshold * nr < total)
mq->promote_threshold++;
}
}
......@@ -640,6 +642,30 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
return 0;
}
/*
* Entries in the pre_cache whose hit count passes the promotion
* threshold move to the cache proper. Working out the correct
* value for the promotion_threshold is crucial to this policy.
*/
static unsigned promote_threshold(struct mq_policy *mq)
{
struct entry *e;
if (any_free_cblocks(mq))
return 0;
e = peek(&mq->cache_clean);
if (e)
return e->hit_count;
e = peek(&mq->cache_dirty);
if (e)
return e->hit_count + DISCOURAGE_DEMOTING_DIRTY_THRESHOLD;
/* This should never happen */
return 0;
}
/*
* We modify the basic promotion_threshold depending on the specific io.
*
......@@ -653,7 +679,7 @@ static unsigned adjusted_promote_threshold(struct mq_policy *mq,
bool discarded_oblock, int data_dir)
{
if (data_dir == READ)
return mq->promote_threshold + mq->read_promote_adjustment;
return promote_threshold(mq) + mq->read_promote_adjustment;
if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) {
/*
......@@ -663,7 +689,7 @@ static unsigned adjusted_promote_threshold(struct mq_policy *mq,
return mq->discard_promote_adjustment;
}
return mq->promote_threshold + mq->write_promote_adjustment;
return promote_threshold(mq) + mq->write_promote_adjustment;
}
static bool should_promote(struct mq_policy *mq, struct entry *e,
......@@ -839,7 +865,8 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,
if (e && in_cache(mq, e))
r = cache_entry_found(mq, e, result);
else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
else if (mq->tracker.thresholds[PATTERN_SEQUENTIAL] &&
iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
result->op = POLICY_MISS;
else if (e)
......@@ -1230,7 +1257,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
mq->tick = 0;
mq->hit_count = 0;
mq->generation = 0;
mq->promote_threshold = 0;
mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT;
mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT;
mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT;
......@@ -1265,7 +1291,7 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
static struct dm_cache_policy_type mq_policy_type = {
.name = "mq",
.version = {1, 2, 0},
.version = {1, 3, 0},
.hint_size = 4,
.owner = THIS_MODULE,
.create = mq_create
......@@ -1273,7 +1299,7 @@ static struct dm_cache_policy_type mq_policy_type = {
static struct dm_cache_policy_type default_policy_type = {
.name = "default",
.version = {1, 2, 0},
.version = {1, 3, 0},
.hint_size = 4,
.owner = THIS_MODULE,
.create = mq_create,
......
This diff is collapsed.
......@@ -705,7 +705,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++)
crypto_xor(data + i * 8, buf, 8);
out:
memset(buf, 0, sizeof(buf));
memzero_explicit(buf, sizeof(buf));
return r;
}
......
......@@ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
int srcu_idx;
param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
DM_ACTIVE_PRESENT_FLAG);
DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG);
if (dm_suspended_md(md))
param->flags |= DM_SUSPEND_FLAG;
if (dm_suspended_internally_md(md))
param->flags |= DM_INTERNAL_SUSPEND_FLAG;
if (dm_test_deferred_remove_flag(md))
param->flags |= DM_DEFERRED_REMOVE;
......
......@@ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md,
return 1;
id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
dm_internal_suspend, dm_internal_resume, md);
dm_internal_suspend_fast, dm_internal_resume_fast, md);
if (id < 0)
return id;
......
......@@ -1521,18 +1521,32 @@ fmode_t dm_table_get_mode(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_get_mode);
static void suspend_targets(struct dm_table *t, unsigned postsuspend)
enum suspend_mode {
PRESUSPEND,
PRESUSPEND_UNDO,
POSTSUSPEND,
};
static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
{
int i = t->num_targets;
struct dm_target *ti = t->targets;
while (i--) {
if (postsuspend) {
switch (mode) {
case PRESUSPEND:
if (ti->type->presuspend)
ti->type->presuspend(ti);
break;
case PRESUSPEND_UNDO:
if (ti->type->presuspend_undo)
ti->type->presuspend_undo(ti);
break;
case POSTSUSPEND:
if (ti->type->postsuspend)
ti->type->postsuspend(ti);
} else if (ti->type->presuspend)
ti->type->presuspend(ti);
break;
}
ti++;
}
}
......@@ -1542,7 +1556,15 @@ void dm_table_presuspend_targets(struct dm_table *t)
if (!t)
return;
suspend_targets(t, 0);
suspend_targets(t, PRESUSPEND);
}
void dm_table_presuspend_undo_targets(struct dm_table *t)
{
if (!t)
return;
suspend_targets(t, PRESUSPEND_UNDO);
}
void dm_table_postsuspend_targets(struct dm_table *t)
......@@ -1550,7 +1572,7 @@ void dm_table_postsuspend_targets(struct dm_table *t)
if (!t)
return;
suspend_targets(t, 1);
suspend_targets(t, POSTSUSPEND);
}
int dm_table_resume_targets(struct dm_table *t)
......
......@@ -1384,42 +1384,38 @@ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
}
int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
int can_block, struct dm_thin_lookup_result *result)
int can_issue_io, struct dm_thin_lookup_result *result)
{
int r = -EINVAL;
uint64_t block_time = 0;
int r;
__le64 value;
struct dm_pool_metadata *pmd = td->pmd;
dm_block_t keys[2] = { td->id, block };
struct dm_btree_info *info;
if (can_block) {
down_read(&pmd->root_lock);
info = &pmd->info;
} else if (down_read_trylock(&pmd->root_lock))
info = &pmd->nb_info;
else
return -EWOULDBLOCK;
if (pmd->fail_io)
goto out;
return -EINVAL;
r = dm_btree_lookup(info, pmd->root, keys, &value);
if (!r)
block_time = le64_to_cpu(value);
down_read(&pmd->root_lock);
out:
up_read(&pmd->root_lock);
if (can_issue_io) {
info = &pmd->info;
} else
info = &pmd->nb_info;
r = dm_btree_lookup(info, pmd->root, keys, &value);
if (!r) {
uint64_t block_time = 0;
dm_block_t exception_block;
uint32_t exception_time;
block_time = le64_to_cpu(value);
unpack_block_time(block_time, &exception_block,
&exception_time);
result->block = exception_block;
result->shared = __snapshotted_since(td, exception_time);
}
up_read(&pmd->root_lock);
return r;
}
......@@ -1813,3 +1809,8 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd)
return needs_check;
}
void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd)
{
dm_tm_issue_prefetches(pmd->tm);
}
......@@ -139,12 +139,12 @@ struct dm_thin_lookup_result {
/*
* Returns:
* -EWOULDBLOCK iff @can_block is set and would block.
* -EWOULDBLOCK iff @can_issue_io is set and would issue IO
* -ENODATA iff that mapping is not present.
* 0 success
*/
int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
int can_block, struct dm_thin_lookup_result *result);
int can_issue_io, struct dm_thin_lookup_result *result);
/*
* Obtain an unused block.
......@@ -213,6 +213,11 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd);
bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd);
/*
* Issue any prefetches that may be useful.
*/
void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd);
/*----------------------------------------------------------------*/
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -65,6 +65,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits);
struct list_head *dm_table_get_devices(struct dm_table *t);
void dm_table_presuspend_targets(struct dm_table *t);
void dm_table_presuspend_undo_targets(struct dm_table *t);
void dm_table_postsuspend_targets(struct dm_table *t);
int dm_table_resume_targets(struct dm_table *t);
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
......@@ -128,6 +129,15 @@ int dm_deleting_md(struct mapped_device *md);
*/
int dm_suspended_md(struct mapped_device *md);
/*
* Internal suspend and resume methods.
*/
int dm_suspended_internally_md(struct mapped_device *md);
void dm_internal_suspend_fast(struct mapped_device *md);
void dm_internal_resume_fast(struct mapped_device *md);
void dm_internal_suspend_noflush(struct mapped_device *md);
void dm_internal_resume(struct mapped_device *md);
/*
* Test if the device is scheduled for deferred remove.
*/
......
......@@ -645,8 +645,10 @@ static int array_resize(struct dm_array_info *info, dm_block_t root,
int r;
struct resize resize;
if (old_size == new_size)
if (old_size == new_size) {
*new_root = root;
return 0;
}
resize.info = info;
resize.root = root;
......
......@@ -564,7 +564,9 @@ static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count
{
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
return smm->ll.nr_blocks;
*count = smm->ll.nr_blocks;
return 0;
}
static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
......@@ -581,7 +583,9 @@ static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
{
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
return b < smm->begin ? 1 : 0;
*result = (b < smm->begin) ? 1 : 0;
return 0;
}
static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
......
......@@ -10,6 +10,8 @@
#include "dm-persistent-data-internal.h"
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/device-mapper.h>
......@@ -17,6 +19,61 @@
/*----------------------------------------------------------------*/
#define PREFETCH_SIZE 128
#define PREFETCH_BITS 7
#define PREFETCH_SENTINEL ((dm_block_t) -1ULL)
struct prefetch_set {
struct mutex lock;
dm_block_t blocks[PREFETCH_SIZE];
};
static unsigned prefetch_hash(dm_block_t b)
{
return hash_64(b, PREFETCH_BITS);
}
static void prefetch_wipe(struct prefetch_set *p)
{
unsigned i;
for (i = 0; i < PREFETCH_SIZE; i++)
p->blocks[i] = PREFETCH_SENTINEL;
}
static void prefetch_init(struct prefetch_set *p)
{
mutex_init(&p->lock);
prefetch_wipe(p);
}
static void prefetch_add(struct prefetch_set *p, dm_block_t b)
{
unsigned h = prefetch_hash(b);
mutex_lock(&p->lock);
if (p->blocks[h] == PREFETCH_SENTINEL)
p->blocks[h] = b;
mutex_unlock(&p->lock);
}
static void prefetch_issue(struct prefetch_set *p, struct dm_block_manager *bm)
{
unsigned i;
mutex_lock(&p->lock);
for (i = 0; i < PREFETCH_SIZE; i++)
if (p->blocks[i] != PREFETCH_SENTINEL) {
dm_bm_prefetch(bm, p->blocks[i]);
p->blocks[i] = PREFETCH_SENTINEL;
}
mutex_unlock(&p->lock);
}
/*----------------------------------------------------------------*/
struct shadow_info {
struct hlist_node hlist;
dm_block_t where;
......@@ -37,6 +94,8 @@ struct dm_transaction_manager {
spinlock_t lock;
struct hlist_head buckets[DM_HASH_SIZE];
struct prefetch_set prefetches;
};
/*----------------------------------------------------------------*/
......@@ -117,6 +176,8 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
for (i = 0; i < DM_HASH_SIZE; i++)
INIT_HLIST_HEAD(tm->buckets + i);
prefetch_init(&tm->prefetches);
return tm;
}
......@@ -268,8 +329,14 @@ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
struct dm_block_validator *v,
struct dm_block **blk)
{
if (tm->is_clone)
return dm_bm_read_try_lock(tm->real->bm, b, v, blk);
if (tm->is_clone) {
int r = dm_bm_read_try_lock(tm->real->bm, b, v, blk);
if (r == -EWOULDBLOCK)
prefetch_add(&tm->real->prefetches, b);
return r;
}
return dm_bm_read_lock(tm->bm, b, v, blk);
}
......@@ -317,6 +384,12 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
return tm->bm;
}
void dm_tm_issue_prefetches(struct dm_transaction_manager *tm)
{
prefetch_issue(&tm->prefetches, tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_issue_prefetches);
/*----------------------------------------------------------------*/
static int dm_tm_create_internal(struct dm_block_manager *bm,
......
......@@ -108,6 +108,13 @@ int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm);
/*
* If you're using a non-blocking clone the tm will build up a list of
* requested blocks that weren't in core. This call will request those
* blocks to be prefetched.
*/
void dm_tm_issue_prefetches(struct dm_transaction_manager *tm);
/*
* A little utility that ties the knot by producing a transaction manager
* that has a space map managed by the transaction manager...
......
......@@ -64,6 +64,7 @@ typedef int (*dm_request_endio_fn) (struct dm_target *ti,
union map_info *map_context);
typedef void (*dm_presuspend_fn) (struct dm_target *ti);
typedef void (*dm_presuspend_undo_fn) (struct dm_target *ti);
typedef void (*dm_postsuspend_fn) (struct dm_target *ti);
typedef int (*dm_preresume_fn) (struct dm_target *ti);
typedef void (*dm_resume_fn) (struct dm_target *ti);
......@@ -145,6 +146,7 @@ struct target_type {
dm_endio_fn end_io;
dm_request_endio_fn rq_end_io;
dm_presuspend_fn presuspend;
dm_presuspend_undo_fn presuspend_undo;
dm_postsuspend_fn postsuspend;
dm_preresume_fn preresume;
dm_resume_fn resume;
......
......@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 28
#define DM_VERSION_MINOR 29
#define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2014-09-17)"
#define DM_VERSION_EXTRA "-ioctl (2014-10-28)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
......@@ -352,4 +352,9 @@ enum {
*/
#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
/*
* If set, the device is suspended internally.
*/
#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
#endif /* _LINUX_DM_IOCTL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment