Commit feaa7cb5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md/4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD updates from Shaohua Li:
 "Several patches from Guoqing fixing md-cluster bugs and several
  patches from Heinz fixing dm-raid bugs"

* tag 'md/4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md-cluster: check the return value of process_recvd_msg
  md-cluster: gather resync infos and enable recv_thread after bitmap is ready
  md: set MD_CHANGE_PENDING in a atomic region
  md: raid5: add prerequisite to run underneath dm-raid
  md: raid10: add prerequisite to run underneath dm-raid
  md: md.c: fix oops in mddev_suspend for raid0
  md-cluster: fix ifnullfree.cocci warnings
  md-cluster/bitmap: unplug bitmap to sync dirty pages to disk
  md-cluster/bitmap: fix wrong page num in bitmap_file_clear_bit and bitmap_file_set_bit
  md-cluster/bitmap: fix wrong calcuation of offset
  md-cluster: sync bitmap when node received RESYNCING msg
  md-cluster: always setup in-memory bitmap
  md-cluster: wakeup thread if activated a spare disk
  md-cluster: change array_sectors and update size are not supported
  md-cluster: fix locking when node joins cluster during message broadcast
  md-cluster: unregister thread if err happened
  md-cluster: wake up thread to continue recovery
  md-cluser: make resync_finish only called after pers->sync_request
  md-cluster: change resync lock from asynchronous to synchronous
parents e0fb1b36 1fa9a1ad
......@@ -316,3 +316,9 @@ The algorithm is:
nodes are using the raid which is achieved by lock all bitmap
locks within the cluster, and also those locks are unlocked
accordingly.
7. Unsupported features
There are somethings which are not supported by cluster MD yet.
- update size and change array_sectors.
......@@ -46,7 +46,7 @@ static inline char *bmname(struct bitmap *bitmap)
* allocated while we're using it
*/
static int bitmap_checkpage(struct bitmap_counts *bitmap,
unsigned long page, int create)
unsigned long page, int create, int no_hijack)
__releases(bitmap->lock)
__acquires(bitmap->lock)
{
......@@ -90,6 +90,9 @@ __acquires(bitmap->lock)
if (mappage == NULL) {
pr_debug("md/bitmap: map page allocation failed, hijacking\n");
/* We don't support hijack for cluster raid */
if (no_hijack)
return -ENOMEM;
/* failed - set the hijacked flag so that we can use the
* pointer as a counter */
if (!bitmap->bp[page].map)
......@@ -756,7 +759,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
bytes += sizeof(bitmap_super_t);
num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
offset = slot_number * (num_pages - 1);
offset = slot_number * num_pages;
store->filemap = kmalloc(sizeof(struct page *)
* num_pages, GFP_KERNEL);
......@@ -900,6 +903,11 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
struct page *page;
void *kaddr;
unsigned long chunk = block >> bitmap->counts.chunkshift;
struct bitmap_storage *store = &bitmap->storage;
unsigned long node_offset = 0;
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * store->file_pages;
page = filemap_get_page(&bitmap->storage, chunk);
if (!page)
......@@ -915,7 +923,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
kunmap_atomic(kaddr);
pr_debug("set file bit %lu page %lu\n", bit, page->index);
/* record page number so it gets flushed to disk when unplug occurs */
set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
}
static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
......@@ -924,6 +932,11 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
struct page *page;
void *paddr;
unsigned long chunk = block >> bitmap->counts.chunkshift;
struct bitmap_storage *store = &bitmap->storage;
unsigned long node_offset = 0;
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * store->file_pages;
page = filemap_get_page(&bitmap->storage, chunk);
if (!page)
......@@ -935,8 +948,8 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
else
clear_bit_le(bit, paddr);
kunmap_atomic(paddr);
if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
bitmap->allclean = 0;
}
}
......@@ -1321,7 +1334,7 @@ __acquires(bitmap->lock)
sector_t csize;
int err;
err = bitmap_checkpage(bitmap, page, create);
err = bitmap_checkpage(bitmap, page, create, 0);
if (bitmap->bp[page].hijacked ||
bitmap->bp[page].map == NULL)
......@@ -1594,6 +1607,27 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
}
EXPORT_SYMBOL(bitmap_cond_end_sync);
void bitmap_sync_with_cluster(struct mddev *mddev,
sector_t old_lo, sector_t old_hi,
sector_t new_lo, sector_t new_hi)
{
struct bitmap *bitmap = mddev->bitmap;
sector_t sector, blocks = 0;
for (sector = old_lo; sector < new_lo; ) {
bitmap_end_sync(bitmap, sector, &blocks, 0);
sector += blocks;
}
WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
for (sector = old_hi; sector < new_hi; ) {
bitmap_start_sync(bitmap, sector, &blocks, 0);
sector += blocks;
}
WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
}
EXPORT_SYMBOL(bitmap_sync_with_cluster);
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
{
/* For each chunk covered by any of these sectors, set the
......@@ -1814,6 +1848,9 @@ int bitmap_load(struct mddev *mddev)
if (!bitmap)
goto out;
if (mddev_is_clustered(mddev))
md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
/* Clear out old bitmap info first: Either there is none, or we
* are resuming after someone else has possibly changed things,
* so we should forget old cached info.
......@@ -1890,14 +1927,14 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
if (clear_bits) {
bitmap_update_sb(bitmap);
/* Setting this for the ev_page should be enough.
* And we do not require both write_all and PAGE_DIRT either
*/
/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
* BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
for (i = 0; i < bitmap->storage.file_pages; i++)
set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
bitmap_write_all(bitmap);
if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
bitmap_unplug(bitmap);
}
bitmap_unplug(mddev->bitmap);
*low = lo;
*high = hi;
err:
......@@ -2032,6 +2069,35 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
chunks << chunkshift);
spin_lock_irq(&bitmap->counts.lock);
/* For cluster raid, need to pre-allocate bitmap */
if (mddev_is_clustered(bitmap->mddev)) {
unsigned long page;
for (page = 0; page < pages; page++) {
ret = bitmap_checkpage(&bitmap->counts, page, 1, 1);
if (ret) {
unsigned long k;
/* deallocate the page memory */
for (k = 0; k < page; k++) {
kfree(new_bp[k].map);
}
/* restore some fields from old_counts */
bitmap->counts.bp = old_counts.bp;
bitmap->counts.pages = old_counts.pages;
bitmap->counts.missing_pages = old_counts.pages;
bitmap->counts.chunkshift = old_counts.chunkshift;
bitmap->counts.chunks = old_counts.chunks;
bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
BITMAP_BLOCK_SHIFT);
blocks = old_counts.chunks << old_counts.chunkshift;
pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
break;
} else
bitmap->counts.bp[page].count += 1;
}
}
for (block = 0; block < blocks; ) {
bitmap_counter_t *bmc_old, *bmc_new;
int set;
......
......@@ -258,6 +258,9 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void bitmap_sync_with_cluster(struct mddev *mddev,
sector_t old_lo, sector_t old_hi,
sector_t new_lo, sector_t new_hi);
void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev);
......
......@@ -61,6 +61,10 @@ struct resync_info {
* the lock.
*/
#define MD_CLUSTER_SEND_LOCKED_ALREADY 5
/* We should receive message after node joined cluster and
* set up all the related infos such as bitmap and personality */
#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
#define MD_CLUSTER_PENDING_RECV_EVENT 7
struct md_cluster_info {
......@@ -85,6 +89,9 @@ struct md_cluster_info {
struct completion newdisk_completion;
wait_queue_head_t wait;
unsigned long state;
/* record the region in RESYNCING message */
sector_t sync_low;
sector_t sync_hi;
};
enum msg_type {
......@@ -284,11 +291,14 @@ static void recover_bitmaps(struct md_thread *thread)
goto dlm_unlock;
}
if (hi > 0) {
/* TODO:Wait for current resync to get over */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (lo < mddev->recovery_cp)
mddev->recovery_cp = lo;
md_check_recovery(mddev);
/* wake up thread to continue resync in case resync
* is not finished */
if (mddev->recovery_cp != MaxSector) {
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
}
dlm_unlock:
dlm_unlock_sync(bm_lockres);
......@@ -370,8 +380,12 @@ static void ack_bast(void *arg, int mode)
struct dlm_lock_resource *res = arg;
struct md_cluster_info *cinfo = res->mddev->cluster_info;
if (mode == DLM_LOCK_EX)
if (mode == DLM_LOCK_EX) {
if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
md_wakeup_thread(cinfo->recv_thread);
else
set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
}
}
static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
......@@ -408,6 +422,30 @@ static void process_suspend_info(struct mddev *mddev,
md_wakeup_thread(mddev->thread);
return;
}
/*
* The bitmaps are not same for different nodes
* if RESYNCING is happening in one node, then
* the node which received the RESYNCING message
* probably will perform resync with the region
* [lo, hi] again, so we could reduce resync time
* a lot if we can ensure that the bitmaps among
* different nodes are match up well.
*
* sync_low/hi is used to record the region which
* arrived in the previous RESYNCING message,
*
* Call bitmap_sync_with_cluster to clear
* NEEDED_MASK and set RESYNC_MASK since
* resync thread is running in another node,
* so we don't need to do the resync again
* with the same section */
bitmap_sync_with_cluster(mddev, cinfo->sync_low,
cinfo->sync_hi,
lo, hi);
cinfo->sync_low = lo;
cinfo->sync_hi = hi;
s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
if (!s)
return;
......@@ -482,11 +520,13 @@ static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
__func__, __LINE__, le32_to_cpu(msg->raid_slot));
}
static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
{
int ret = 0;
if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
"node %d received it's own msg\n", le32_to_cpu(msg->slot)))
return;
return -1;
switch (le32_to_cpu(msg->type)) {
case METADATA_UPDATED:
process_metadata_update(mddev, msg);
......@@ -509,9 +549,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
__recover_slot(mddev, le32_to_cpu(msg->slot));
break;
default:
ret = -1;
pr_warn("%s:%d Received unknown message from %d\n",
__func__, __LINE__, msg->slot);
}
return ret;
}
/*
......@@ -535,7 +577,9 @@ static void recv_daemon(struct md_thread *thread)
/* read lvb and wake up thread to process this message_lockres */
memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
process_recvd_msg(thread->mddev, &msg);
ret = process_recvd_msg(thread->mddev, &msg);
if (ret)
goto out;
/*release CR on ack_lockres*/
ret = dlm_unlock_sync(ack_lockres);
......@@ -549,6 +593,7 @@ static void recv_daemon(struct md_thread *thread)
ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
if (unlikely(ret != 0))
pr_info("lock CR on ack failed return %d\n", ret);
out:
/*release CR on message_lockres*/
ret = dlm_unlock_sync(message_lockres);
if (unlikely(ret != 0))
......@@ -778,17 +823,24 @@ static int join(struct mddev *mddev, int nodes)
cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
if (!cinfo->token_lockres)
goto err;
cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
if (!cinfo->ack_lockres)
goto err;
cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
if (!cinfo->no_new_dev_lockres)
goto err;
ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
if (ret) {
ret = -EAGAIN;
pr_err("md-cluster: can't join cluster to avoid lock issue\n");
goto err;
}
cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
if (!cinfo->ack_lockres)
goto err;
/* get sync CR lock on ACK. */
if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
ret);
dlm_unlock_sync(cinfo->token_lockres);
/* get sync CR lock on no-new-dev. */
if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
......@@ -809,12 +861,10 @@ static int join(struct mddev *mddev, int nodes)
if (!cinfo->resync_lockres)
goto err;
ret = gather_all_resync_info(mddev, nodes);
if (ret)
goto err;
return 0;
err:
md_unregister_thread(&cinfo->recovery_thread);
md_unregister_thread(&cinfo->recv_thread);
lockres_free(cinfo->message_lockres);
lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres);
......@@ -828,6 +878,19 @@ static int join(struct mddev *mddev, int nodes)
return ret;
}
static void load_bitmaps(struct mddev *mddev, int total_slots)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
/* load all the node's bitmap info for resync */
if (gather_all_resync_info(mddev, total_slots))
pr_err("md-cluster: failed to gather all resyn infos\n");
set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
/* wake up recv thread in case something need to be handled */
if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
md_wakeup_thread(cinfo->recv_thread);
}
static void resync_bitmap(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
......@@ -937,7 +1000,6 @@ static void metadata_update_cancel(struct mddev *mddev)
static int resync_start(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
}
......@@ -967,7 +1029,6 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
static int resync_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
dlm_unlock_sync(cinfo->resync_lockres);
return resync_info_update(mddev, 0, 0);
}
......@@ -1171,6 +1232,7 @@ static struct md_cluster_operations cluster_ops = {
.add_new_disk_cancel = add_new_disk_cancel,
.new_disk_ack = new_disk_ack,
.remove_disk = remove_disk,
.load_bitmaps = load_bitmaps,
.gather_bitmaps = gather_bitmaps,
.lock_all_bitmaps = lock_all_bitmaps,
.unlock_all_bitmaps = unlock_all_bitmaps,
......
......@@ -23,6 +23,7 @@ struct md_cluster_operations {
void (*add_new_disk_cancel)(struct mddev *mddev);
int (*new_disk_ack)(struct mddev *mddev, bool ack);
int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
void (*load_bitmaps)(struct mddev *mddev, int total_slots);
int (*gather_bitmaps)(struct md_rdev *rdev);
int (*lock_all_bitmaps)(struct mddev *mddev);
void (*unlock_all_bitmaps)(struct mddev *mddev);
......
......@@ -307,7 +307,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
*/
void mddev_suspend(struct mddev *mddev)
{
WARN_ON_ONCE(current == mddev->thread->tsk);
WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
if (mddev->suspended++)
return;
synchronize_rcu();
......@@ -2291,19 +2291,24 @@ void md_update_sb(struct mddev *mddev, int force_change)
return;
}
repeat:
if (mddev_is_clustered(mddev)) {
if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
force_change = 1;
if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
nospares = 1;
ret = md_cluster_ops->metadata_update_start(mddev);
/* Has someone else has updated the sb */
if (!does_sb_need_changing(mddev)) {
if (ret == 0)
md_cluster_ops->metadata_update_cancel(mddev);
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
BIT(MD_CHANGE_DEVS) |
BIT(MD_CHANGE_CLEAN));
return;
}
}
repeat:
/* First make sure individual recovery_offsets are correct */
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
......@@ -2430,15 +2435,14 @@ void md_update_sb(struct mddev *mddev, int force_change)
md_super_wait(mddev);
/* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
spin_lock(&mddev->lock);
if (mddev_is_clustered(mddev) && ret == 0)
md_cluster_ops->metadata_update_finish(mddev);
if (mddev->in_sync != sync_req ||
test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
!bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
/* have to write it out again */
spin_unlock(&mddev->lock);
goto repeat;
}
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
spin_unlock(&mddev->lock);
wake_up(&mddev->sb_wait);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
......@@ -2452,9 +2456,6 @@ void md_update_sb(struct mddev *mddev, int force_change)
clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
}
if (mddev_is_clustered(mddev) && ret == 0)
md_cluster_ops->metadata_update_finish(mddev);
}
EXPORT_SYMBOL(md_update_sb);
......@@ -4816,6 +4817,10 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
if (err)
return err;
/* cluster raid doesn't support change array_sectors */
if (mddev_is_clustered(mddev))
return -EINVAL;
if (strncmp(buf, "default", 7) == 0) {
if (mddev->pers)
sectors = mddev->pers->size(mddev, 0, 0);
......@@ -6437,6 +6442,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
int rv;
int fit = (num_sectors == 0);
/* cluster raid doesn't support update size */
if (mddev_is_clustered(mddev))
return -EINVAL;
if (mddev->pers->resize == NULL)
return -EINVAL;
/* The "num_sectors" is the number of sectors of each device that
......@@ -7785,7 +7794,7 @@ void md_do_sync(struct md_thread *thread)
struct md_rdev *rdev;
char *desc, *action = NULL;
struct blk_plug plug;
bool cluster_resync_finished = false;
int ret;
/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
......@@ -7795,6 +7804,19 @@ void md_do_sync(struct md_thread *thread)
return;
}
if (mddev_is_clustered(mddev)) {
ret = md_cluster_ops->resync_start(mddev);
if (ret)
goto skip;
if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
&& ((unsigned long long)mddev->curr_resync_completed
< (unsigned long long)mddev->resync_max_sectors))
goto skip;
}
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
desc = "data-check";
......@@ -8089,11 +8111,6 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync_completed = mddev->curr_resync;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
/* tell personality and other nodes that we are finished */
if (mddev_is_clustered(mddev)) {
md_cluster_ops->resync_finish(mddev);
cluster_resync_finished = true;
}
mddev->pers->sync_request(mddev, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
......@@ -8130,12 +8147,18 @@ void md_do_sync(struct md_thread *thread)
}
}
skip:
set_bit(MD_CHANGE_DEVS, &mddev->flags);
if (mddev_is_clustered(mddev) &&
test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!cluster_resync_finished)
ret == 0) {
/* set CHANGE_PENDING here since maybe another
* update is needed, so other nodes are informed */
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
md_cluster_ops->resync_finish(mddev);
} else
set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_lock(&mddev->lock);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
......@@ -8226,18 +8249,9 @@ static void md_start_sync(struct work_struct *ws)
struct mddev *mddev = container_of(ws, struct mddev, del_work);
int ret = 0;
if (mddev_is_clustered(mddev)) {
ret = md_cluster_ops->resync_start(mddev);
if (ret) {
mddev->sync_thread = NULL;
goto out;
}
}
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"resync");
out:
if (!mddev->sync_thread) {
if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
printk(KERN_ERR "%s: could not start resync"
......@@ -8536,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape);
int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new)
{
struct mddev *mddev = rdev->mddev;
int rv;
if (is_new)
s += rdev->new_data_offset;
......@@ -8545,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
if (rv == 0) {
/* Make sure they get written out promptly */
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
md_wakeup_thread(rdev->mddev->thread);
return 1;
} else
......@@ -8680,6 +8695,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
ret = remove_and_add_spares(mddev, rdev2);
pr_info("Activated spare: %s\n",
bdevname(rdev2->bdev,b));
/* wakeup mddev->thread here, so array could
* perform resync with the new activated disk */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
/* device faulty
* We just want to do the minimum to mark the disk
......
......@@ -1474,8 +1474,8 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
set_bit(MD_CHANGE_PENDING, &mddev->flags);
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
printk(KERN_ALERT
"md/raid1:%s: Disk failure on %s, disabling device.\n"
"md/raid1:%s: Operation continuing on %d devices.\n",
......
......@@ -1102,8 +1102,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
bio->bi_iter.bi_sector < conf->reshape_progress))) {
/* Need to update reshape_position in metadata */
mddev->reshape_position = conf->reshape_progress;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
set_bit(MD_CHANGE_PENDING, &mddev->flags);
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
......@@ -1591,8 +1591,8 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
set_bit(MD_CHANGE_PENDING, &mddev->flags);
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
spin_unlock_irqrestore(&conf->device_lock, flags);
printk(KERN_ALERT
"md/raid10:%s: Disk failure on %s, disabling device.\n"
......@@ -3782,8 +3782,10 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, size);
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > oldsize) {
mddev->recovery_cp = oldsize;
......@@ -4593,8 +4595,10 @@ static void raid10_finish_reshape(struct mddev *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->resync_max_sectors = size;
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
} else {
int d;
for (d = conf->geo.raid_disks ;
......
......@@ -712,8 +712,8 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
* in_teardown check workaround this issue.
*/
if (!log->in_teardown) {
set_bit(MD_CHANGE_DEVS, &mddev->flags);
set_bit(MD_CHANGE_PENDING, &mddev->flags);
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
......
......@@ -2514,8 +2514,8 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
set_bit(MD_CHANGE_PENDING, &mddev->flags);
set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n"
"md/raid:%s: Operation continuing on %d devices.\n",
......@@ -7572,8 +7572,10 @@ static void raid5_finish_reshape(struct mddev *mddev)
if (mddev->delta_disks > 0) {
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
} else {
int d;
spin_lock_irq(&conf->device_lock);
......
......@@ -227,6 +227,22 @@ static inline unsigned long __ffs64(u64 word)
})
#endif
#ifndef bit_clear_unless
#define bit_clear_unless(ptr, _clear, _test) \
({ \
const typeof(*ptr) clear = (_clear), test = (_test); \
typeof(*ptr) old, new; \
\
do { \
old = ACCESS_ONCE(*ptr); \
new = old & ~clear; \
} while (!(old & test) && \
cmpxchg(ptr, old, new) != old); \
\
!(old & test); \
})
#endif
#ifndef find_last_bit
/**
* find_last_bit - find the last set bit in a memory region
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment