Commit 22a8f39c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.6/drivers-2020-01-27' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "Like the core side, not a lot of changes here, just two main items:

   - Series of patches (via Coly) with fixes for bcache (Coly,
     Christoph)

   - MD pull request from Song"

* tag 'for-5.6/drivers-2020-01-27' of git://git.kernel.dk/linux-block: (31 commits)
  bcache: reap from tail of c->btree_cache in bch_mca_scan()
  bcache: reap c->btree_cache_freeable from the tail in bch_mca_scan()
  bcache: remove member accessed from struct btree
  bcache: print written and keys in trace_bcache_btree_write
  bcache: avoid unnecessary btree nodes flushing in btree_flush_write()
  bcache: add code comments for state->pool in __btree_sort()
  lib: crc64: include <linux/crc64.h> for 'crc64_be'
  bcache: use read_cache_page_gfp to read the superblock
  bcache: store a pointer to the on-disk sb in the cache and cached_dev structures
  bcache: return a pointer to the on-disk sb from read_super
  bcache: transfer the sb_page reference to register_{bdev,cache}
  bcache: fix use-after-free in register_bcache()
  bcache: properly initialize 'path' and 'err' in register_bcache()
  bcache: rework error unwinding in register_bcache
  bcache: use a separate data structure for the on-disk super block
  bcache: cached_dev_free needs to put the sb page
  md/raid1: introduce wait_for_serialization
  md/raid1: use bucket based mechanism for IO serialization
  md: introduce a new struct for IO serialization
  md: don't destroy serial_info_pool if serialize_policy is true
  ...
parents 48b4b4ff e3de0446
...@@ -301,6 +301,7 @@ struct cached_dev { ...@@ -301,6 +301,7 @@ struct cached_dev {
struct block_device *bdev; struct block_device *bdev;
struct cache_sb sb; struct cache_sb sb;
struct cache_sb_disk *sb_disk;
struct bio sb_bio; struct bio sb_bio;
struct bio_vec sb_bv[1]; struct bio_vec sb_bv[1];
struct closure sb_write; struct closure sb_write;
...@@ -403,6 +404,7 @@ enum alloc_reserve { ...@@ -403,6 +404,7 @@ enum alloc_reserve {
struct cache { struct cache {
struct cache_set *set; struct cache_set *set;
struct cache_sb sb; struct cache_sb sb;
struct cache_sb_disk *sb_disk;
struct bio sb_bio; struct bio sb_bio;
struct bio_vec sb_bv[1]; struct bio_vec sb_bv[1];
......
...@@ -1257,6 +1257,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, ...@@ -1257,6 +1257,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
* Our temporary buffer is the same size as the btree node's * Our temporary buffer is the same size as the btree node's
* buffer, we can just swap buffers instead of doing a big * buffer, we can just swap buffers instead of doing a big
* memcpy() * memcpy()
*
* Don't worry event 'out' is allocated from mempool, it can
* still be swapped here. Because state->pool is a page mempool
* creaated by by mempool_init_page_pool(), which allocates
* pages by alloc_pages() indeed.
*/ */
out->magic = b->set->data->magic; out->magic = b->set->data->magic;
......
...@@ -734,34 +734,32 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, ...@@ -734,34 +734,32 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
i = 0; i = 0;
btree_cache_used = c->btree_cache_used; btree_cache_used = c->btree_cache_used;
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { list_for_each_entry_safe_reverse(b, t, &c->btree_cache_freeable, list) {
if (nr <= 0) if (nr <= 0)
goto out; goto out;
if (++i > 3 && if (!mca_reap(b, 0, false)) {
!mca_reap(b, 0, false)) {
mca_data_free(b); mca_data_free(b);
rw_unlock(true, b); rw_unlock(true, b);
freed++; freed++;
} }
nr--; nr--;
i++;
} }
for (; (nr--) && i < btree_cache_used; i++) { list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
if (list_empty(&c->btree_cache)) if (nr <= 0 || i >= btree_cache_used)
goto out; goto out;
b = list_first_entry(&c->btree_cache, struct btree, list); if (!mca_reap(b, 0, false)) {
list_rotate_left(&c->btree_cache);
if (!b->accessed &&
!mca_reap(b, 0, false)) {
mca_bucket_free(b); mca_bucket_free(b);
mca_data_free(b); mca_data_free(b);
rw_unlock(true, b); rw_unlock(true, b);
freed++; freed++;
} else }
b->accessed = 0;
nr--;
i++;
} }
out: out:
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
...@@ -1069,7 +1067,6 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, ...@@ -1069,7 +1067,6 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
BUG_ON(!b->written); BUG_ON(!b->written);
b->parent = parent; b->parent = parent;
b->accessed = 1;
for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
prefetch(b->keys.set[i].tree); prefetch(b->keys.set[i].tree);
...@@ -1160,7 +1157,6 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, ...@@ -1160,7 +1157,6 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
goto retry; goto retry;
} }
b->accessed = 1;
b->parent = parent; b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
......
...@@ -121,8 +121,6 @@ struct btree { ...@@ -121,8 +121,6 @@ struct btree {
/* Key/pointer for this btree node */ /* Key/pointer for this btree node */
BKEY_PADDED(key); BKEY_PADDED(key);
/* Single bit - set when accessed, cleared by shrinker */
unsigned long accessed;
unsigned long seq; unsigned long seq;
struct rw_semaphore lock; struct rw_semaphore lock;
struct cache_set *c; struct cache_set *c;
......
...@@ -417,10 +417,14 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) ...@@ -417,10 +417,14 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
/* Journalling */ /* Journalling */
#define nr_to_fifo_front(p, front_p, mask) (((p) - (front_p)) & (mask))
static void btree_flush_write(struct cache_set *c) static void btree_flush_write(struct cache_set *c)
{ {
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR]; struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
unsigned int i, n; unsigned int i, nr, ref_nr;
atomic_t *fifo_front_p, *now_fifo_front_p;
size_t mask;
if (c->journal.btree_flushing) if (c->journal.btree_flushing)
return; return;
...@@ -433,12 +437,50 @@ static void btree_flush_write(struct cache_set *c) ...@@ -433,12 +437,50 @@ static void btree_flush_write(struct cache_set *c)
c->journal.btree_flushing = true; c->journal.btree_flushing = true;
spin_unlock(&c->journal.flush_write_lock); spin_unlock(&c->journal.flush_write_lock);
/* get the oldest journal entry and check its refcount */
spin_lock(&c->journal.lock);
fifo_front_p = &fifo_front(&c->journal.pin);
ref_nr = atomic_read(fifo_front_p);
if (ref_nr <= 0) {
/*
* do nothing if no btree node references
* the oldest journal entry
*/
spin_unlock(&c->journal.lock);
goto out;
}
spin_unlock(&c->journal.lock);
mask = c->journal.pin.mask;
nr = 0;
atomic_long_inc(&c->flush_write); atomic_long_inc(&c->flush_write);
memset(btree_nodes, 0, sizeof(btree_nodes)); memset(btree_nodes, 0, sizeof(btree_nodes));
n = 0;
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) { list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
/*
* It is safe to get now_fifo_front_p without holding
* c->journal.lock here, because we don't need to know
* the exactly accurate value, just check whether the
* front pointer of c->journal.pin is changed.
*/
now_fifo_front_p = &fifo_front(&c->journal.pin);
/*
* If the oldest journal entry is reclaimed and front
* pointer of c->journal.pin changes, it is unnecessary
* to scan c->btree_cache anymore, just quit the loop and
* flush out what we have already.
*/
if (now_fifo_front_p != fifo_front_p)
break;
/*
* quit this loop if all matching btree nodes are
* scanned and record in btree_nodes[] already.
*/
ref_nr = atomic_read(fifo_front_p);
if (nr >= ref_nr)
break;
if (btree_node_journal_flush(b)) if (btree_node_journal_flush(b))
pr_err("BUG: flush_write bit should not be set here!"); pr_err("BUG: flush_write bit should not be set here!");
...@@ -454,17 +496,44 @@ static void btree_flush_write(struct cache_set *c) ...@@ -454,17 +496,44 @@ static void btree_flush_write(struct cache_set *c)
continue; continue;
} }
/*
* Only select the btree node which exactly references
* the oldest journal entry.
*
* If the journal entry pointed by fifo_front_p is
* reclaimed in parallel, don't worry:
* - the list_for_each_xxx loop will quit when checking
* next now_fifo_front_p.
* - If there are matched nodes recorded in btree_nodes[],
* they are clean now (this is why and how the oldest
* journal entry can be reclaimed). These selected nodes
* will be ignored and skipped in the folowing for-loop.
*/
if (nr_to_fifo_front(btree_current_write(b)->journal,
fifo_front_p,
mask) != 0) {
mutex_unlock(&b->write_lock);
continue;
}
set_btree_node_journal_flush(b); set_btree_node_journal_flush(b);
mutex_unlock(&b->write_lock); mutex_unlock(&b->write_lock);
btree_nodes[n++] = b; btree_nodes[nr++] = b;
if (n == BTREE_FLUSH_NR) /*
* To avoid holding c->bucket_lock too long time,
* only scan for BTREE_FLUSH_NR matched btree nodes
* at most. If there are more btree nodes reference
* the oldest journal entry, try to flush them next
* time when btree_flush_write() is called.
*/
if (nr == BTREE_FLUSH_NR)
break; break;
} }
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
for (i = 0; i < n; i++) { for (i = 0; i < nr; i++) {
b = btree_nodes[i]; b = btree_nodes[i];
if (!b) { if (!b) {
pr_err("BUG: btree_nodes[%d] is NULL", i); pr_err("BUG: btree_nodes[%d] is NULL", i);
...@@ -497,6 +566,7 @@ static void btree_flush_write(struct cache_set *c) ...@@ -497,6 +566,7 @@ static void btree_flush_write(struct cache_set *c)
mutex_unlock(&b->write_lock); mutex_unlock(&b->write_lock);
} }
out:
spin_lock(&c->journal.flush_write_lock); spin_lock(&c->journal.flush_write_lock);
c->journal.btree_flushing = false; c->journal.btree_flushing = false;
spin_unlock(&c->journal.flush_write_lock); spin_unlock(&c->journal.flush_write_lock);
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "writeback.h" #include "writeback.h"
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/genhd.h> #include <linux/genhd.h>
#include <linux/idr.h> #include <linux/idr.h>
...@@ -60,17 +59,18 @@ struct workqueue_struct *bch_journal_wq; ...@@ -60,17 +59,18 @@ struct workqueue_struct *bch_journal_wq;
/* Superblock */ /* Superblock */
static const char *read_super(struct cache_sb *sb, struct block_device *bdev, static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
struct page **res) struct cache_sb_disk **res)
{ {
const char *err; const char *err;
struct cache_sb *s; struct cache_sb_disk *s;
struct buffer_head *bh = __bread(bdev, 1, SB_SIZE); struct page *page;
unsigned int i; unsigned int i;
if (!bh) page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
if (IS_ERR(page))
return "IO error"; return "IO error";
s = page_address(page) + offset_in_page(SB_OFFSET);
s = (struct cache_sb *) bh->b_data;
sb->offset = le64_to_cpu(s->offset); sb->offset = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version); sb->version = le64_to_cpu(s->version);
...@@ -188,12 +188,10 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, ...@@ -188,12 +188,10 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
} }
sb->last_mount = (u32)ktime_get_real_seconds(); sb->last_mount = (u32)ktime_get_real_seconds();
err = NULL; *res = s;
return NULL;
get_page(bh->b_page);
*res = bh->b_page;
err: err:
put_bh(bh); put_page(page);
return err; return err;
} }
...@@ -207,15 +205,15 @@ static void write_bdev_super_endio(struct bio *bio) ...@@ -207,15 +205,15 @@ static void write_bdev_super_endio(struct bio *bio)
closure_put(&dc->sb_write); closure_put(&dc->sb_write);
} }
static void __write_super(struct cache_sb *sb, struct bio *bio) static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
struct bio *bio)
{ {
struct cache_sb *out = page_address(bio_first_page_all(bio));
unsigned int i; unsigned int i;
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
bio->bi_iter.bi_sector = SB_SECTOR; bio->bi_iter.bi_sector = SB_SECTOR;
bio->bi_iter.bi_size = SB_SIZE; __bio_add_page(bio, virt_to_page(out), SB_SIZE,
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META); offset_in_page(out));
bch_bio_map(bio, NULL);
out->offset = cpu_to_le64(sb->offset); out->offset = cpu_to_le64(sb->offset);
out->version = cpu_to_le64(sb->version); out->version = cpu_to_le64(sb->version);
...@@ -257,14 +255,14 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) ...@@ -257,14 +255,14 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
down(&dc->sb_write_mutex); down(&dc->sb_write_mutex);
closure_init(cl, parent); closure_init(cl, parent);
bio_reset(bio); bio_init(bio, dc->sb_bv, 1);
bio_set_dev(bio, dc->bdev); bio_set_dev(bio, dc->bdev);
bio->bi_end_io = write_bdev_super_endio; bio->bi_end_io = write_bdev_super_endio;
bio->bi_private = dc; bio->bi_private = dc;
closure_get(cl); closure_get(cl);
/* I/O request sent to backing device */ /* I/O request sent to backing device */
__write_super(&dc->sb, bio); __write_super(&dc->sb, dc->sb_disk, bio);
closure_return_with_destructor(cl, bch_write_bdev_super_unlock); closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
} }
...@@ -306,13 +304,13 @@ void bcache_write_super(struct cache_set *c) ...@@ -306,13 +304,13 @@ void bcache_write_super(struct cache_set *c)
SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb));
bio_reset(bio); bio_init(bio, ca->sb_bv, 1);
bio_set_dev(bio, ca->bdev); bio_set_dev(bio, ca->bdev);
bio->bi_end_io = write_super_endio; bio->bi_end_io = write_super_endio;
bio->bi_private = ca; bio->bi_private = ca;
closure_get(cl); closure_get(cl);
__write_super(&ca->sb, bio); __write_super(&ca->sb, ca->sb_disk, bio);
} }
closure_return_with_destructor(cl, bcache_write_super_unlock); closure_return_with_destructor(cl, bcache_write_super_unlock);
...@@ -1275,6 +1273,9 @@ static void cached_dev_free(struct closure *cl) ...@@ -1275,6 +1273,9 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
if (dc->sb_disk)
put_page(virt_to_page(dc->sb_disk));
if (!IS_ERR_OR_NULL(dc->bdev)) if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
...@@ -1350,7 +1351,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) ...@@ -1350,7 +1351,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
/* Cached device - bcache superblock */ /* Cached device - bcache superblock */
static int register_bdev(struct cache_sb *sb, struct page *sb_page, static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
struct block_device *bdev, struct block_device *bdev,
struct cached_dev *dc) struct cached_dev *dc)
{ {
...@@ -1362,11 +1363,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page, ...@@ -1362,11 +1363,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page,
memcpy(&dc->sb, sb, sizeof(struct cache_sb)); memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->bdev = bdev; dc->bdev = bdev;
dc->bdev->bd_holder = dc; dc->bdev->bd_holder = dc;
dc->sb_disk = sb_disk;
bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1);
bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
get_page(sb_page);
if (cached_dev_init(dc, sb->block_size << 9)) if (cached_dev_init(dc, sb->block_size << 9))
goto err; goto err;
...@@ -2136,8 +2133,8 @@ void bch_cache_release(struct kobject *kobj) ...@@ -2136,8 +2133,8 @@ void bch_cache_release(struct kobject *kobj)
for (i = 0; i < RESERVE_NR; i++) for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]); free_fifo(&ca->free[i]);
if (ca->sb_bio.bi_inline_vecs[0].bv_page) if (ca->sb_disk)
put_page(bio_first_page_all(&ca->sb_bio)); put_page(virt_to_page(ca->sb_disk));
if (!IS_ERR_OR_NULL(ca->bdev)) if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
...@@ -2259,7 +2256,7 @@ static int cache_alloc(struct cache *ca) ...@@ -2259,7 +2256,7 @@ static int cache_alloc(struct cache *ca)
return ret; return ret;
} }
static int register_cache(struct cache_sb *sb, struct page *sb_page, static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
struct block_device *bdev, struct cache *ca) struct block_device *bdev, struct cache *ca)
{ {
const char *err = NULL; /* must be set for any error case */ const char *err = NULL; /* must be set for any error case */
...@@ -2269,10 +2266,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ...@@ -2269,10 +2266,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
memcpy(&ca->sb, sb, sizeof(struct cache_sb)); memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev; ca->bdev = bdev;
ca->bdev->bd_holder = ca; ca->bdev->bd_holder = ca;
ca->sb_disk = sb_disk;
bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1);
bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
get_page(sb_page);
if (blk_queue_discard(bdev_get_queue(bdev))) if (blk_queue_discard(bdev_get_queue(bdev)))
ca->discard = CACHE_DISCARD(&ca->sb); ca->discard = CACHE_DISCARD(&ca->sb);
...@@ -2372,29 +2366,35 @@ static bool bch_is_open(struct block_device *bdev) ...@@ -2372,29 +2366,35 @@ static bool bch_is_open(struct block_device *bdev)
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size) const char *buffer, size_t size)
{ {
ssize_t ret = -EINVAL; const char *err;
const char *err = "cannot allocate memory";
char *path = NULL; char *path = NULL;
struct cache_sb *sb = NULL; struct cache_sb *sb;
struct block_device *bdev = NULL; struct cache_sb_disk *sb_disk;
struct page *sb_page = NULL; struct block_device *bdev;
ssize_t ret;
ret = -EBUSY;
err = "failed to reference bcache module";
if (!try_module_get(THIS_MODULE)) if (!try_module_get(THIS_MODULE))
return -EBUSY; goto out;
/* For latest state of bcache_is_reboot */ /* For latest state of bcache_is_reboot */
smp_mb(); smp_mb();
err = "bcache is in reboot";
if (bcache_is_reboot) if (bcache_is_reboot)
return -EBUSY; goto out_module_put;
ret = -ENOMEM;
err = "cannot allocate memory";
path = kstrndup(buffer, size, GFP_KERNEL); path = kstrndup(buffer, size, GFP_KERNEL);
if (!path) if (!path)
goto err; goto out_module_put;
sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL); sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
if (!sb) if (!sb)
goto err; goto out_free_path;
ret = -EINVAL;
err = "failed to open device"; err = "failed to open device";
bdev = blkdev_get_by_path(strim(path), bdev = blkdev_get_by_path(strim(path),
FMODE_READ|FMODE_WRITE|FMODE_EXCL, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
...@@ -2411,57 +2411,63 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ...@@ -2411,57 +2411,63 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!IS_ERR(bdev)) if (!IS_ERR(bdev))
bdput(bdev); bdput(bdev);
if (attr == &ksysfs_register_quiet) if (attr == &ksysfs_register_quiet)
goto quiet_out; goto done;
} }
goto err; goto out_free_sb;
} }
err = "failed to set blocksize"; err = "failed to set blocksize";
if (set_blocksize(bdev, 4096)) if (set_blocksize(bdev, 4096))
goto err_close; goto out_blkdev_put;
err = read_super(sb, bdev, &sb_page); err = read_super(sb, bdev, &sb_disk);
if (err) if (err)
goto err_close; goto out_blkdev_put;
err = "failed to register device"; err = "failed to register device";
if (SB_IS_BDEV(sb)) { if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc) if (!dc)
goto err_close; goto out_put_sb_page;
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
ret = register_bdev(sb, sb_page, bdev, dc); ret = register_bdev(sb, sb_disk, bdev, dc);
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */ /* blkdev_put() will be called in cached_dev_free() */
if (ret < 0) if (ret < 0)
goto err; goto out_free_sb;
} else { } else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca) if (!ca)
goto err_close; goto out_put_sb_page;
/* blkdev_put() will be called in bch_cache_release() */ /* blkdev_put() will be called in bch_cache_release() */
if (register_cache(sb, sb_page, bdev, ca) != 0) if (register_cache(sb, sb_disk, bdev, ca) != 0)
goto err; goto out_free_sb;
} }
quiet_out:
ret = size; done:
out:
if (sb_page)
put_page(sb_page);
kfree(sb); kfree(sb);
kfree(path); kfree(path);
module_put(THIS_MODULE); module_put(THIS_MODULE);
return ret; return size;
err_close: out_put_sb_page:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); put_page(virt_to_page(sb_disk));
err: out_blkdev_put:
pr_info("error %s: %s", path, err); blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
goto out; out_free_sb:
kfree(sb);
out_free_path:
kfree(path);
path = NULL;
out_module_put:
module_put(THIS_MODULE);
out:
pr_info("error %s: %s", path?path:"", err);
return ret;
} }
......
...@@ -1019,8 +1019,6 @@ void md_bitmap_unplug(struct bitmap *bitmap) ...@@ -1019,8 +1019,6 @@ void md_bitmap_unplug(struct bitmap *bitmap)
/* look at each page to see if there are any set bits that need to be /* look at each page to see if there are any set bits that need to be
* flushed out to disk */ * flushed out to disk */
for (i = 0; i < bitmap->storage.file_pages; i++) { for (i = 0; i < bitmap->storage.file_pages; i++) {
if (!bitmap->storage.filemap)
return;
dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
need_write = test_and_clear_page_attr(bitmap, i, need_write = test_and_clear_page_attr(bitmap, i,
BITMAP_PAGE_NEEDWRITE); BITMAP_PAGE_NEEDWRITE);
...@@ -1338,7 +1336,8 @@ void md_bitmap_daemon_work(struct mddev *mddev) ...@@ -1338,7 +1336,8 @@ void md_bitmap_daemon_work(struct mddev *mddev)
BITMAP_PAGE_DIRTY)) BITMAP_PAGE_DIRTY))
/* bitmap_unplug will handle the rest */ /* bitmap_unplug will handle the rest */
break; break;
if (test_and_clear_page_attr(bitmap, j, if (bitmap->storage.filemap &&
test_and_clear_page_attr(bitmap, j,
BITMAP_PAGE_NEEDWRITE)) { BITMAP_PAGE_NEEDWRITE)) {
write_page(bitmap, bitmap->storage.filemap[j], 0); write_page(bitmap, bitmap->storage.filemap[j], 0);
} }
...@@ -1790,8 +1789,8 @@ void md_bitmap_destroy(struct mddev *mddev) ...@@ -1790,8 +1789,8 @@ void md_bitmap_destroy(struct mddev *mddev)
return; return;
md_bitmap_wait_behind_writes(mddev); md_bitmap_wait_behind_writes(mddev);
mempool_destroy(mddev->wb_info_pool); if (!mddev->serialize_policy)
mddev->wb_info_pool = NULL; mddev_destroy_serial_pool(mddev, NULL, true);
mutex_lock(&mddev->bitmap_info.mutex); mutex_lock(&mddev->bitmap_info.mutex);
spin_lock(&mddev->lock); spin_lock(&mddev->lock);
...@@ -1908,7 +1907,7 @@ int md_bitmap_load(struct mddev *mddev) ...@@ -1908,7 +1907,7 @@ int md_bitmap_load(struct mddev *mddev)
goto out; goto out;
rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev)
mddev_create_wb_pool(mddev, rdev, true); mddev_create_serial_pool(mddev, rdev, true);
if (mddev_is_clustered(mddev)) if (mddev_is_clustered(mddev))
md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
...@@ -2475,16 +2474,16 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2475,16 +2474,16 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
if (backlog > COUNTER_MAX) if (backlog > COUNTER_MAX)
return -EINVAL; return -EINVAL;
mddev->bitmap_info.max_write_behind = backlog; mddev->bitmap_info.max_write_behind = backlog;
if (!backlog && mddev->wb_info_pool) { if (!backlog && mddev->serial_info_pool) {
/* wb_info_pool is not needed if backlog is zero */ /* serial_info_pool is not needed if backlog is zero */
mempool_destroy(mddev->wb_info_pool); if (!mddev->serialize_policy)
mddev->wb_info_pool = NULL; mddev_destroy_serial_pool(mddev, NULL, false);
} else if (backlog && !mddev->wb_info_pool) { } else if (backlog && !mddev->serial_info_pool) {
/* wb_info_pool is needed since backlog is not zero */ /* serial_info_pool is needed since backlog is not zero */
struct md_rdev *rdev; struct md_rdev *rdev;
rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev)
mddev_create_wb_pool(mddev, rdev, false); mddev_create_serial_pool(mddev, rdev, false);
} }
if (old_mwb != backlog) if (old_mwb != backlog)
md_bitmap_update_sb(mddev->bitmap); md_bitmap_update_sb(mddev->bitmap);
......
...@@ -125,74 +125,165 @@ static inline int speed_max(struct mddev *mddev) ...@@ -125,74 +125,165 @@ static inline int speed_max(struct mddev *mddev)
mddev->sync_speed_max : sysctl_speed_limit_max; mddev->sync_speed_max : sysctl_speed_limit_max;
} }
static int rdev_init_wb(struct md_rdev *rdev) static void rdev_uninit_serial(struct md_rdev *rdev)
{ {
if (rdev->bdev->bd_queue->nr_hw_queues == 1) if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
return;
kvfree(rdev->serial);
rdev->serial = NULL;
}
static void rdevs_uninit_serial(struct mddev *mddev)
{
struct md_rdev *rdev;
rdev_for_each(rdev, mddev)
rdev_uninit_serial(rdev);
}
static int rdev_init_serial(struct md_rdev *rdev)
{
/* serial_nums equals with BARRIER_BUCKETS_NR */
int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
struct serial_in_rdev *serial = NULL;
if (test_bit(CollisionCheck, &rdev->flags))
return 0; return 0;
spin_lock_init(&rdev->wb_list_lock); serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
INIT_LIST_HEAD(&rdev->wb_list); GFP_KERNEL);
init_waitqueue_head(&rdev->wb_io_wait); if (!serial)
set_bit(WBCollisionCheck, &rdev->flags); return -ENOMEM;
return 1; for (i = 0; i < serial_nums; i++) {
struct serial_in_rdev *serial_tmp = &serial[i];
spin_lock_init(&serial_tmp->serial_lock);
serial_tmp->serial_rb = RB_ROOT_CACHED;
init_waitqueue_head(&serial_tmp->serial_io_wait);
}
rdev->serial = serial;
set_bit(CollisionCheck, &rdev->flags);
return 0;
}
static int rdevs_init_serial(struct mddev *mddev)
{
struct md_rdev *rdev;
int ret = 0;
rdev_for_each(rdev, mddev) {
ret = rdev_init_serial(rdev);
if (ret)
break;
}
/* Free all resources if pool is not existed */
if (ret && !mddev->serial_info_pool)
rdevs_uninit_serial(mddev);
return ret;
} }
/* /*
* Create wb_info_pool if rdev is the first multi-queue device flaged * rdev needs to enable serial stuffs if it meets the conditions:
* with writemostly, also write-behind mode is enabled. * 1. it is multi-queue device flaged with writemostly.
* 2. the write-behind mode is enabled.
*/ */
void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev, static int rdev_need_serial(struct md_rdev *rdev)
bool is_suspend)
{ {
if (mddev->bitmap_info.max_write_behind == 0) return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
return; rdev->bdev->bd_queue->nr_hw_queues != 1 &&
test_bit(WriteMostly, &rdev->flags));
}
/*
* Init resource for rdev(s), then create serial_info_pool if:
* 1. rdev is the first device which return true from rdev_enable_serial.
* 2. rdev is NULL, means we want to enable serialization for all rdevs.
*/
void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend)
{
int ret = 0;
if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev)) if (rdev && !rdev_need_serial(rdev) &&
!test_bit(CollisionCheck, &rdev->flags))
return; return;
if (mddev->wb_info_pool == NULL) { if (!is_suspend)
mddev_suspend(mddev);
if (!rdev)
ret = rdevs_init_serial(mddev);
else
ret = rdev_init_serial(rdev);
if (ret)
goto abort;
if (mddev->serial_info_pool == NULL) {
unsigned int noio_flag; unsigned int noio_flag;
if (!is_suspend)
mddev_suspend(mddev);
noio_flag = memalloc_noio_save(); noio_flag = memalloc_noio_save();
mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS, mddev->serial_info_pool =
sizeof(struct wb_info)); mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info));
memalloc_noio_restore(noio_flag); memalloc_noio_restore(noio_flag);
if (!mddev->wb_info_pool) if (!mddev->serial_info_pool) {
pr_err("can't alloc memory pool for writemostly\n"); rdevs_uninit_serial(mddev);
if (!is_suspend) pr_err("can't alloc memory pool for serialization\n");
mddev_resume(mddev); }
} }
abort:
if (!is_suspend)
mddev_resume(mddev);
} }
EXPORT_SYMBOL_GPL(mddev_create_wb_pool);
/* /*
* destroy wb_info_pool if rdev is the last device flaged with WBCollisionCheck. * Free resource from rdev(s), and destroy serial_info_pool under conditions:
* 1. rdev is the last device flaged with CollisionCheck.
* 2. when bitmap is destroyed while policy is not enabled.
* 3. for disable policy, the pool is destroyed only when no rdev needs it.
*/ */
static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev) void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend)
{ {
if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags)) if (rdev && !test_bit(CollisionCheck, &rdev->flags))
return; return;
if (mddev->wb_info_pool) { if (mddev->serial_info_pool) {
struct md_rdev *temp; struct md_rdev *temp;
int num = 0; int num = 0; /* used to track if other rdevs need the pool */
/* if (!is_suspend)
* Check if other rdevs need wb_info_pool. mddev_suspend(mddev);
*/ rdev_for_each(temp, mddev) {
rdev_for_each(temp, mddev) if (!rdev) {
if (temp != rdev && if (!mddev->serialize_policy ||
test_bit(WBCollisionCheck, &temp->flags)) !rdev_need_serial(temp))
rdev_uninit_serial(temp);
else
num++;
} else if (temp != rdev &&
test_bit(CollisionCheck, &temp->flags))
num++; num++;
if (!num) {
mddev_suspend(rdev->mddev);
mempool_destroy(mddev->wb_info_pool);
mddev->wb_info_pool = NULL;
mddev_resume(rdev->mddev);
} }
if (rdev)
rdev_uninit_serial(rdev);
if (num)
pr_info("The mempool could be used by other devices\n");
else {
mempool_destroy(mddev->serial_info_pool);
mddev->serial_info_pool = NULL;
}
if (!is_suspend)
mddev_resume(mddev);
} }
} }
...@@ -2337,7 +2428,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) ...@@ -2337,7 +2428,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
pr_debug("md: bind<%s>\n", b); pr_debug("md: bind<%s>\n", b);
if (mddev->raid_disks) if (mddev->raid_disks)
mddev_create_wb_pool(mddev, rdev, false); mddev_create_serial_pool(mddev, rdev, false);
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail; goto fail;
...@@ -2375,7 +2466,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) ...@@ -2375,7 +2466,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set); list_del_rcu(&rdev->same_set);
pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b)); pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
mddev_destroy_wb_pool(rdev->mddev, rdev); mddev_destroy_serial_pool(rdev->mddev, rdev, false);
rdev->mddev = NULL; rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block"); sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state); sysfs_put(rdev->sysfs_state);
...@@ -2888,10 +2979,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) ...@@ -2888,10 +2979,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
} }
} else if (cmd_match(buf, "writemostly")) { } else if (cmd_match(buf, "writemostly")) {
set_bit(WriteMostly, &rdev->flags); set_bit(WriteMostly, &rdev->flags);
mddev_create_wb_pool(rdev->mddev, rdev, false); mddev_create_serial_pool(rdev->mddev, rdev, false);
err = 0; err = 0;
} else if (cmd_match(buf, "-writemostly")) { } else if (cmd_match(buf, "-writemostly")) {
mddev_destroy_wb_pool(rdev->mddev, rdev); mddev_destroy_serial_pool(rdev->mddev, rdev, false);
clear_bit(WriteMostly, &rdev->flags); clear_bit(WriteMostly, &rdev->flags);
err = 0; err = 0;
} else if (cmd_match(buf, "blocked")) { } else if (cmd_match(buf, "blocked")) {
...@@ -5277,6 +5368,57 @@ static struct md_sysfs_entry md_fail_last_dev = ...@@ -5277,6 +5368,57 @@ static struct md_sysfs_entry md_fail_last_dev =
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show, __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
fail_last_dev_store); fail_last_dev_store);
static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
{
if (mddev->pers == NULL || (mddev->pers->level != 1))
return sprintf(page, "n/a\n");
else
return sprintf(page, "%d\n", mddev->serialize_policy);
}
/*
* Setting serialize_policy to true to enforce write IO is not reordered
* for raid1.
*/
static ssize_t
serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
{
int err;
bool value;
err = kstrtobool(buf, &value);
if (err)
return err;
if (value == mddev->serialize_policy)
return len;
err = mddev_lock(mddev);
if (err)
return err;
if (mddev->pers == NULL || (mddev->pers->level != 1)) {
pr_err("md: serialize_policy is only effective for raid1\n");
err = -EINVAL;
goto unlock;
}
mddev_suspend(mddev);
if (value)
mddev_create_serial_pool(mddev, NULL, true);
else
mddev_destroy_serial_pool(mddev, NULL, true);
mddev->serialize_policy = value;
mddev_resume(mddev);
unlock:
mddev_unlock(mddev);
return err ?: len;
}
static struct md_sysfs_entry md_serialize_policy =
__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
serialize_policy_store);
static struct attribute *md_default_attrs[] = { static struct attribute *md_default_attrs[] = {
&md_level.attr, &md_level.attr,
&md_layout.attr, &md_layout.attr,
...@@ -5294,6 +5436,7 @@ static struct attribute *md_default_attrs[] = { ...@@ -5294,6 +5436,7 @@ static struct attribute *md_default_attrs[] = {
&max_corr_read_errors.attr, &max_corr_read_errors.attr,
&md_consistency_policy.attr, &md_consistency_policy.attr,
&md_fail_last_dev.attr, &md_fail_last_dev.attr,
&md_serialize_policy.attr,
NULL, NULL,
}; };
...@@ -5769,18 +5912,18 @@ int md_run(struct mddev *mddev) ...@@ -5769,18 +5912,18 @@ int md_run(struct mddev *mddev)
goto bitmap_abort; goto bitmap_abort;
if (mddev->bitmap_info.max_write_behind > 0) { if (mddev->bitmap_info.max_write_behind > 0) {
bool creat_pool = false; bool create_pool = false;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if (test_bit(WriteMostly, &rdev->flags) && if (test_bit(WriteMostly, &rdev->flags) &&
rdev_init_wb(rdev)) rdev_init_serial(rdev))
creat_pool = true; create_pool = true;
} }
if (creat_pool && mddev->wb_info_pool == NULL) { if (create_pool && mddev->serial_info_pool == NULL) {
mddev->wb_info_pool = mddev->serial_info_pool =
mempool_create_kmalloc_pool(NR_WB_INFOS, mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct wb_info)); sizeof(struct serial_info));
if (!mddev->wb_info_pool) { if (!mddev->serial_info_pool) {
err = -ENOMEM; err = -ENOMEM;
goto bitmap_abort; goto bitmap_abort;
} }
...@@ -6025,8 +6168,9 @@ static void __md_stop_writes(struct mddev *mddev) ...@@ -6025,8 +6168,9 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->in_sync = 1; mddev->in_sync = 1;
md_update_sb(mddev, 1); md_update_sb(mddev, 1);
} }
mempool_destroy(mddev->wb_info_pool); /* disable policy to guarantee rdevs free resources for serialization */
mddev->wb_info_pool = NULL; mddev->serialize_policy = 0;
mddev_destroy_serial_pool(mddev, NULL, true);
} }
void md_stop_writes(struct mddev *mddev) void md_stop_writes(struct mddev *mddev)
......
...@@ -32,6 +32,16 @@ ...@@ -32,6 +32,16 @@
* be retried. * be retried.
*/ */
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT) #define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
/*
* The struct embedded in rdev is used to serialize IO.
*/
struct serial_in_rdev {
struct rb_root_cached serial_rb;
spinlock_t serial_lock;
wait_queue_head_t serial_io_wait;
};
/* /*
* MD's 'extended' device * MD's 'extended' device
*/ */
...@@ -110,12 +120,7 @@ struct md_rdev { ...@@ -110,12 +120,7 @@ struct md_rdev {
* in superblock. * in superblock.
*/ */
/* struct serial_in_rdev *serial; /* used for raid1 io serialization */
* The members for check collision of write behind IOs.
*/
struct list_head wb_list;
spinlock_t wb_list_lock;
wait_queue_head_t wb_io_wait;
struct work_struct del_work; /* used for delayed sysfs removal */ struct work_struct del_work; /* used for delayed sysfs removal */
...@@ -201,9 +206,9 @@ enum flag_bits { ...@@ -201,9 +206,9 @@ enum flag_bits {
* it didn't fail, so don't use FailFast * it didn't fail, so don't use FailFast
* any more for metadata * any more for metadata
*/ */
WBCollisionCheck, /* CollisionCheck, /*
* multiqueue device should check if there * check if there is collision between raid1
* is collision between write behind bios. * serial bios.
*/ */
}; };
...@@ -263,12 +268,13 @@ enum mddev_sb_flags { ...@@ -263,12 +268,13 @@ enum mddev_sb_flags {
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
}; };
#define NR_WB_INFOS 8 #define NR_SERIAL_INFOS 8
/* record current range of write behind IOs */ /* record current range of serialize IOs */
struct wb_info { struct serial_info {
sector_t lo; struct rb_node node;
sector_t hi; sector_t start; /* start sector of rb node */
struct list_head list; sector_t last; /* end sector of rb node */
sector_t _subtree_last; /* highest sector in subtree of rb node */
}; };
struct mddev { struct mddev {
...@@ -487,13 +493,14 @@ struct mddev { ...@@ -487,13 +493,14 @@ struct mddev {
*/ */
struct work_struct flush_work; struct work_struct flush_work;
struct work_struct event_work; /* used by dm to report failure event */ struct work_struct event_work; /* used by dm to report failure event */
mempool_t *wb_info_pool; mempool_t *serial_info_pool;
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info; struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */ unsigned int good_device_nr; /* good device num within cluster raid */
bool has_superblocks:1; bool has_superblocks:1;
bool fail_last_dev:1; bool fail_last_dev:1;
bool serialize_policy:1;
}; };
enum recovery_flags { enum recovery_flags {
...@@ -737,8 +744,10 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, ...@@ -737,8 +744,10 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force); extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev); extern void md_kick_rdev_from_array(struct md_rdev * rdev);
extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev, extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend); bool is_suspend);
extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
bool is_suspend);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev); struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/interval_tree_generic.h>
#include <trace/events/block.h> #include <trace/events/block.h>
...@@ -50,55 +51,71 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); ...@@ -50,55 +51,71 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
#include "raid1-10.c" #include "raid1-10.c"
static int check_and_add_wb(struct md_rdev *rdev, sector_t lo, sector_t hi) #define START(node) ((node)->start)
#define LAST(node) ((node)->last)
INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last,
START, LAST, static inline, raid1_rb);
static int check_and_add_serial(struct md_rdev *rdev, struct r1bio *r1_bio,
struct serial_info *si, int idx)
{ {
struct wb_info *wi, *temp_wi;
unsigned long flags; unsigned long flags;
int ret = 0; int ret = 0;
struct mddev *mddev = rdev->mddev; sector_t lo = r1_bio->sector;
sector_t hi = lo + r1_bio->sectors;
wi = mempool_alloc(mddev->wb_info_pool, GFP_NOIO); struct serial_in_rdev *serial = &rdev->serial[idx];
spin_lock_irqsave(&rdev->wb_list_lock, flags); spin_lock_irqsave(&serial->serial_lock, flags);
list_for_each_entry(temp_wi, &rdev->wb_list, list) { /* collision happened */
/* collision happened */ if (raid1_rb_iter_first(&serial->serial_rb, lo, hi))
if (hi > temp_wi->lo && lo < temp_wi->hi) { ret = -EBUSY;
ret = -EBUSY; else {
break; si->start = lo;
} si->last = hi;
raid1_rb_insert(si, &serial->serial_rb);
} }
spin_unlock_irqrestore(&serial->serial_lock, flags);
if (!ret) {
wi->lo = lo;
wi->hi = hi;
list_add(&wi->list, &rdev->wb_list);
} else
mempool_free(wi, mddev->wb_info_pool);
spin_unlock_irqrestore(&rdev->wb_list_lock, flags);
return ret; return ret;
} }
static void remove_wb(struct md_rdev *rdev, sector_t lo, sector_t hi) static void wait_for_serialization(struct md_rdev *rdev, struct r1bio *r1_bio)
{
struct mddev *mddev = rdev->mddev;
struct serial_info *si;
int idx = sector_to_idx(r1_bio->sector);
struct serial_in_rdev *serial = &rdev->serial[idx];
if (WARN_ON(!mddev->serial_info_pool))
return;
si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
wait_event(serial->serial_io_wait,
check_and_add_serial(rdev, r1_bio, si, idx) == 0);
}
static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
{ {
struct wb_info *wi; struct serial_info *si;
unsigned long flags; unsigned long flags;
int found = 0; int found = 0;
struct mddev *mddev = rdev->mddev; struct mddev *mddev = rdev->mddev;
int idx = sector_to_idx(lo);
spin_lock_irqsave(&rdev->wb_list_lock, flags); struct serial_in_rdev *serial = &rdev->serial[idx];
list_for_each_entry(wi, &rdev->wb_list, list)
if (hi == wi->hi && lo == wi->lo) { spin_lock_irqsave(&serial->serial_lock, flags);
list_del(&wi->list); for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
mempool_free(wi, mddev->wb_info_pool); si; si = raid1_rb_iter_next(si, lo, hi)) {
if (si->start == lo && si->last == hi) {
raid1_rb_remove(si, &serial->serial_rb);
mempool_free(si, mddev->serial_info_pool);
found = 1; found = 1;
break; break;
} }
}
if (!found) if (!found)
WARN(1, "The write behind IO is not recorded\n"); WARN(1, "The write IO is not recorded for serialization\n");
spin_unlock_irqrestore(&rdev->wb_list_lock, flags); spin_unlock_irqrestore(&serial->serial_lock, flags);
wake_up(&rdev->wb_io_wait); wake_up(&serial->serial_io_wait);
} }
/* /*
...@@ -430,6 +447,8 @@ static void raid1_end_write_request(struct bio *bio) ...@@ -430,6 +447,8 @@ static void raid1_end_write_request(struct bio *bio)
int mirror = find_bio_disk(r1_bio, bio); int mirror = find_bio_disk(r1_bio, bio);
struct md_rdev *rdev = conf->mirrors[mirror].rdev; struct md_rdev *rdev = conf->mirrors[mirror].rdev;
bool discard_error; bool discard_error;
sector_t lo = r1_bio->sector;
sector_t hi = r1_bio->sector + r1_bio->sectors;
discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
...@@ -499,12 +518,8 @@ static void raid1_end_write_request(struct bio *bio) ...@@ -499,12 +518,8 @@ static void raid1_end_write_request(struct bio *bio)
} }
if (behind) { if (behind) {
if (test_bit(WBCollisionCheck, &rdev->flags)) { if (test_bit(CollisionCheck, &rdev->flags))
sector_t lo = r1_bio->sector; remove_serial(rdev, lo, hi);
sector_t hi = r1_bio->sector + r1_bio->sectors;
remove_wb(rdev, lo, hi);
}
if (test_bit(WriteMostly, &rdev->flags)) if (test_bit(WriteMostly, &rdev->flags))
atomic_dec(&r1_bio->behind_remaining); atomic_dec(&r1_bio->behind_remaining);
...@@ -527,7 +542,8 @@ static void raid1_end_write_request(struct bio *bio) ...@@ -527,7 +542,8 @@ static void raid1_end_write_request(struct bio *bio)
call_bio_endio(r1_bio); call_bio_endio(r1_bio);
} }
} }
} } else if (rdev->mddev->serialize_policy)
remove_serial(rdev, lo, hi);
if (r1_bio->bios[mirror] == NULL) if (r1_bio->bios[mirror] == NULL)
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
...@@ -1479,6 +1495,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1479,6 +1495,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct bio *mbio = NULL; struct bio *mbio = NULL;
struct md_rdev *rdev = conf->mirrors[i].rdev;
if (!r1_bio->bios[i]) if (!r1_bio->bios[i])
continue; continue;
...@@ -1506,18 +1523,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1506,18 +1523,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
if (r1_bio->behind_master_bio) { if (r1_bio->behind_master_bio) {
struct md_rdev *rdev = conf->mirrors[i].rdev; if (test_bit(CollisionCheck, &rdev->flags))
wait_for_serialization(rdev, r1_bio);
if (test_bit(WBCollisionCheck, &rdev->flags)) {
sector_t lo = r1_bio->sector;
sector_t hi = r1_bio->sector + r1_bio->sectors;
wait_event(rdev->wb_io_wait,
check_and_add_wb(rdev, lo, hi) == 0);
}
if (test_bit(WriteMostly, &rdev->flags)) if (test_bit(WriteMostly, &rdev->flags))
atomic_inc(&r1_bio->behind_remaining); atomic_inc(&r1_bio->behind_remaining);
} } else if (mddev->serialize_policy)
wait_for_serialization(rdev, r1_bio);
r1_bio->bios[i] = mbio; r1_bio->bios[i] = mbio;
......
...@@ -6598,7 +6598,6 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page) ...@@ -6598,7 +6598,6 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
static int alloc_thread_groups(struct r5conf *conf, int cnt, static int alloc_thread_groups(struct r5conf *conf, int cnt,
int *group_cnt, int *group_cnt,
int *worker_cnt_per_group,
struct r5worker_group **worker_groups); struct r5worker_group **worker_groups);
static ssize_t static ssize_t
raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
...@@ -6607,7 +6606,7 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) ...@@ -6607,7 +6606,7 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
unsigned int new; unsigned int new;
int err; int err;
struct r5worker_group *new_groups, *old_groups; struct r5worker_group *new_groups, *old_groups;
int group_cnt, worker_cnt_per_group; int group_cnt;
if (len >= PAGE_SIZE) if (len >= PAGE_SIZE)
return -EINVAL; return -EINVAL;
...@@ -6630,13 +6629,11 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) ...@@ -6630,13 +6629,11 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
if (old_groups) if (old_groups)
flush_workqueue(raid5_wq); flush_workqueue(raid5_wq);
err = alloc_thread_groups(conf, new, err = alloc_thread_groups(conf, new, &group_cnt, &new_groups);
&group_cnt, &worker_cnt_per_group,
&new_groups);
if (!err) { if (!err) {
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
conf->group_cnt = group_cnt; conf->group_cnt = group_cnt;
conf->worker_cnt_per_group = worker_cnt_per_group; conf->worker_cnt_per_group = new;
conf->worker_groups = new_groups; conf->worker_groups = new_groups;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
...@@ -6672,16 +6669,13 @@ static struct attribute_group raid5_attrs_group = { ...@@ -6672,16 +6669,13 @@ static struct attribute_group raid5_attrs_group = {
.attrs = raid5_attrs, .attrs = raid5_attrs,
}; };
static int alloc_thread_groups(struct r5conf *conf, int cnt, static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
int *group_cnt,
int *worker_cnt_per_group,
struct r5worker_group **worker_groups) struct r5worker_group **worker_groups)
{ {
int i, j, k; int i, j, k;
ssize_t size; ssize_t size;
struct r5worker *workers; struct r5worker *workers;
*worker_cnt_per_group = cnt;
if (cnt == 0) { if (cnt == 0) {
*group_cnt = 0; *group_cnt = 0;
*worker_groups = NULL; *worker_groups = NULL;
...@@ -6882,7 +6876,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) ...@@ -6882,7 +6876,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
struct disk_info *disk; struct disk_info *disk;
char pers_name[6]; char pers_name[6];
int i; int i;
int group_cnt, worker_cnt_per_group; int group_cnt;
struct r5worker_group *new_group; struct r5worker_group *new_group;
int ret; int ret;
...@@ -6928,10 +6922,9 @@ static struct r5conf *setup_conf(struct mddev *mddev) ...@@ -6928,10 +6922,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
for (i = 0; i < PENDING_IO_MAX; i++) for (i = 0; i < PENDING_IO_MAX; i++)
list_add(&conf->pending_data[i].sibling, &conf->free_list); list_add(&conf->pending_data[i].sibling, &conf->free_list);
/* Don't enable multi-threading by default*/ /* Don't enable multi-threading by default*/
if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group, if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) {
&new_group)) {
conf->group_cnt = group_cnt; conf->group_cnt = group_cnt;
conf->worker_cnt_per_group = worker_cnt_per_group; conf->worker_cnt_per_group = 0;
conf->worker_groups = new_group; conf->worker_groups = new_group;
} else } else
goto abort; goto abort;
......
...@@ -27,8 +27,8 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; ...@@ -27,8 +27,8 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
#include <errno.h> #include <errno.h>
#include <inttypes.h> #include <inttypes.h>
#include <limits.h>
#include <stddef.h> #include <stddef.h>
#include <string.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> #include <sys/types.h>
...@@ -44,6 +44,9 @@ typedef uint64_t u64; ...@@ -44,6 +44,9 @@ typedef uint64_t u64;
#ifndef PAGE_SIZE #ifndef PAGE_SIZE
# define PAGE_SIZE 4096 # define PAGE_SIZE 4096
#endif #endif
#ifndef PAGE_SHIFT
# define PAGE_SHIFT 12
#endif
extern const char raid6_empty_zero_page[PAGE_SIZE]; extern const char raid6_empty_zero_page[PAGE_SIZE];
#define __init #define __init
...@@ -59,7 +62,9 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; ...@@ -59,7 +62,9 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
#define enable_kernel_altivec() #define enable_kernel_altivec()
#define disable_kernel_altivec() #define disable_kernel_altivec()
#undef EXPORT_SYMBOL
#define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL(sym)
#undef EXPORT_SYMBOL_GPL
#define EXPORT_SYMBOL_GPL(sym) #define EXPORT_SYMBOL_GPL(sym)
#define MODULE_LICENSE(licence) #define MODULE_LICENSE(licence)
#define MODULE_DESCRIPTION(desc) #define MODULE_DESCRIPTION(desc)
......
...@@ -275,7 +275,8 @@ TRACE_EVENT(bcache_btree_write, ...@@ -275,7 +275,8 @@ TRACE_EVENT(bcache_btree_write,
__entry->keys = b->keys.set[b->keys.nsets].data->keys; __entry->keys = b->keys.set[b->keys.nsets].data->keys;
), ),
TP_printk("bucket %zu", __entry->bucket) TP_printk("bucket %zu written block %u + %u",
__entry->bucket, __entry->block, __entry->keys)
); );
DEFINE_EVENT(btree_node, bcache_btree_node_alloc, DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
......
...@@ -148,6 +148,7 @@ static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) ...@@ -148,6 +148,7 @@ static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
#define BCACHE_SB_MAX_VERSION 4 #define BCACHE_SB_MAX_VERSION 4
#define SB_SECTOR 8 #define SB_SECTOR 8
#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT)
#define SB_SIZE 4096 #define SB_SIZE 4096
#define SB_LABEL_SIZE 32 #define SB_LABEL_SIZE 32
#define SB_JOURNAL_BUCKETS 256U #define SB_JOURNAL_BUCKETS 256U
...@@ -156,6 +157,57 @@ static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) ...@@ -156,6 +157,57 @@ static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
#define BDEV_DATA_START_DEFAULT 16 /* sectors */ #define BDEV_DATA_START_DEFAULT 16 /* sectors */
struct cache_sb_disk {
__le64 csum;
__le64 offset; /* sector where this sb was written */
__le64 version;
__u8 magic[16];
__u8 uuid[16];
union {
__u8 set_uuid[16];
__le64 set_magic;
};
__u8 label[SB_LABEL_SIZE];
__le64 flags;
__le64 seq;
__le64 pad[8];
union {
struct {
/* Cache devices */
__le64 nbuckets; /* device size */
__le16 block_size; /* sectors */
__le16 bucket_size; /* sectors */
__le16 nr_in_set;
__le16 nr_this_dev;
};
struct {
/* Backing devices */
__le64 data_offset;
/*
* block_size from the cache device section is still used by
* backing devices, so don't add anything here until we fix
* things to not need it for backing devices anymore
*/
};
};
__le32 last_mount; /* time overflow in y2106 */
__le16 first_bucket;
union {
__le16 njournal_buckets;
__le16 keys;
};
__le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
};
struct cache_sb { struct cache_sb {
__u64 csum; __u64 csum;
__u64 offset; /* sector where this sb was written */ __u64 offset; /* sector where this sb was written */
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/crc64.h>
#include "crc64table.h" #include "crc64table.h"
MODULE_DESCRIPTION("CRC64 calculations"); MODULE_DESCRIPTION("CRC64 calculations");
......
...@@ -124,6 +124,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { ...@@ -124,6 +124,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
#define time_before(x, y) ((x) < (y)) #define time_before(x, y) ((x) < (y))
#endif #endif
#define RAID6_TEST_DISKS 8
#define RAID6_TEST_DISKS_ORDER 3
static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_recov_calls *raid6_choose_recov(void)
{ {
const struct raid6_recov_calls *const *algo; const struct raid6_recov_calls *const *algo;
...@@ -146,7 +149,7 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) ...@@ -146,7 +149,7 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
} }
static inline const struct raid6_calls *raid6_choose_gen( static inline const struct raid6_calls *raid6_choose_gen(
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
{ {
unsigned long perf, bestgenperf, bestxorperf, j0, j1; unsigned long perf, bestgenperf, bestxorperf, j0, j1;
int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
...@@ -181,7 +184,8 @@ static inline const struct raid6_calls *raid6_choose_gen( ...@@ -181,7 +184,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
best = *algo; best = *algo;
} }
pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); (perf * HZ * (disks-2)) >>
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
if (!(*algo)->xor_syndrome) if (!(*algo)->xor_syndrome)
continue; continue;
...@@ -204,17 +208,24 @@ static inline const struct raid6_calls *raid6_choose_gen( ...@@ -204,17 +208,24 @@ static inline const struct raid6_calls *raid6_choose_gen(
bestxorperf = perf; bestxorperf = perf;
pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); (perf * HZ * (disks-2)) >>
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
} }
} }
if (best) { if (best) {
pr_info("raid6: using algorithm %s gen() %ld MB/s\n", if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
best->name, pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
(bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); best->name,
if (best->xor_syndrome) (bestgenperf * HZ * (disks-2)) >>
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2));
(bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); if (best->xor_syndrome)
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
(bestxorperf * HZ * (disks-2)) >>
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
} else
pr_info("raid6: skip pq benchmark and using algorithm %s\n",
best->name);
raid6_call = *best; raid6_call = *best;
} else } else
pr_err("raid6: Yikes! No algorithm found!\n"); pr_err("raid6: Yikes! No algorithm found!\n");
...@@ -228,27 +239,33 @@ static inline const struct raid6_calls *raid6_choose_gen( ...@@ -228,27 +239,33 @@ static inline const struct raid6_calls *raid6_choose_gen(
int __init raid6_select_algo(void) int __init raid6_select_algo(void)
{ {
const int disks = (65536/PAGE_SIZE)+2; const int disks = RAID6_TEST_DISKS;
const struct raid6_calls *gen_best; const struct raid6_calls *gen_best;
const struct raid6_recov_calls *rec_best; const struct raid6_recov_calls *rec_best;
char *syndromes; char *disk_ptr, *p;
void *dptrs[(65536/PAGE_SIZE)+2]; void *dptrs[RAID6_TEST_DISKS];
int i; int i, cycle;
for (i = 0; i < disks-2; i++)
dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
/* Normal code - use a 2-page allocation to avoid D$ conflict */
syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
if (!syndromes) { /* prepare the buffer and fill it circularly with gfmul table */
disk_ptr = (char *)__get_free_pages(GFP_KERNEL, RAID6_TEST_DISKS_ORDER);
if (!disk_ptr) {
pr_err("raid6: Yikes! No memory available.\n"); pr_err("raid6: Yikes! No memory available.\n");
return -ENOMEM; return -ENOMEM;
} }
dptrs[disks-2] = syndromes; p = disk_ptr;
dptrs[disks-1] = syndromes + PAGE_SIZE; for (i = 0; i < disks; i++)
dptrs[i] = p + PAGE_SIZE * i;
cycle = ((disks - 2) * PAGE_SIZE) / 65536;
for (i = 0; i < cycle; i++) {
memcpy(p, raid6_gfmul, 65536);
p += 65536;
}
if ((disks - 2) * PAGE_SIZE % 65536)
memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536);
/* select raid gen_syndrome function */ /* select raid gen_syndrome function */
gen_best = raid6_choose_gen(&dptrs, disks); gen_best = raid6_choose_gen(&dptrs, disks);
...@@ -256,7 +273,7 @@ int __init raid6_select_algo(void) ...@@ -256,7 +273,7 @@ int __init raid6_select_algo(void)
/* select raid recover functions */ /* select raid recover functions */
rec_best = raid6_choose_recov(); rec_best = raid6_choose_recov();
free_pages((unsigned long)syndromes, 1); free_pages((unsigned long)disk_ptr, RAID6_TEST_DISKS_ORDER);
return gen_best && rec_best ? 0 : -EINVAL; return gen_best && rec_best ? 0 : -EINVAL;
} }
......
...@@ -56,8 +56,8 @@ int main(int argc, char *argv[]) ...@@ -56,8 +56,8 @@ int main(int argc, char *argv[])
uint8_t v; uint8_t v;
uint8_t exptbl[256], invtbl[256]; uint8_t exptbl[256], invtbl[256];
printf("#include <linux/raid/pq.h>\n");
printf("#include <linux/export.h>\n"); printf("#include <linux/export.h>\n");
printf("#include <linux/raid/pq.h>\n");
/* Compute multiplication table */ /* Compute multiplication table */
printf("\nconst u8 __attribute__((aligned(256)))\n" printf("\nconst u8 __attribute__((aligned(256)))\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment