Commit 3009b303 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:

 - fix a parity calculation bug of raid5 cache by Song

 - fix a potential deadlock issue by me

 - fix two endian issues by Jason

 - fix a disk limitation issue by Neil

 - other small fixes and cleanup

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/raid1: fix a trivial typo in comments
  md/r5cache: fix set_syndrome_sources() for data in cache
  md: fix incorrect use of lexx_to_cpu in does_sb_need_changing
  md: fix super_offset endianness in super_1_rdev_size_change
  md/raid1/10: fix potential deadlock
  md: don't impose the MD_SB_DISKS limit on arrays without metadata.
  md: move funcs from pers->resize to update_size
  md-cluster: remove useless memset from gather_all_resync_info
  md-cluster: free md_cluster_info if node leave cluster
  md: delete dead code
  md/raid10: submit bio directly to replacement disk
parents 69eea5a4 11353b9d
......@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
bm_lockres->flags |= DLM_LKF_NOQUEUE;
ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (ret == -EAGAIN) {
memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
s = read_resync_info(mddev, bm_lockres);
if (s) {
pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
......@@ -974,6 +973,7 @@ static int leave(struct mddev *mddev)
lockres_free(cinfo->bitmap_lockres);
unlock_all_bitmaps(mddev);
dlm_release_lockspace(cinfo->lockspace, 2);
kfree(cinfo);
return 0;
}
......
......@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
}
EXPORT_SYMBOL(md_flush_request);
void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
struct mddev *mddev = cb->data;
md_wakeup_thread(mddev->thread);
kfree(cb);
}
EXPORT_SYMBOL(md_unplug);
static inline struct mddev *mddev_get(struct mddev *mddev)
{
atomic_inc(&mddev->active);
......@@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
sb->super_offset = rdev->sb_start;
sb->super_offset = cpu_to_le64(rdev->sb_start);
sb->sb_csum = calc_sb_1_csum(sb);
do {
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
......@@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
/* Check if any mddev parameters have changed */
if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
(mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
(mddev->layout != le64_to_cpu(sb->layout)) ||
(mddev->layout != le32_to_cpu(sb->layout)) ||
(mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
(mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
return true;
......@@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->layout = info->layout;
mddev->chunk_sectors = info->chunk_size >> 9;
mddev->max_disks = MD_SB_DISKS;
if (mddev->persistent) {
mddev->flags = 0;
mddev->sb_flags = 0;
mddev->max_disks = MD_SB_DISKS;
mddev->flags = 0;
mddev->sb_flags = 0;
}
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
......@@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
return -ENOSPC;
}
rv = mddev->pers->resize(mddev, num_sectors);
if (!rv)
revalidate_disk(mddev->gendisk);
if (!rv) {
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
}
return rv;
}
......
......@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev);
extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
static inline int mddev_check_plugged(struct mddev *mddev)
{
return !!blk_check_plugged(md_unplug, mddev,
sizeof(struct blk_plug_cb));
}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
......
......@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
static void freeze_array(struct r1conf *conf, int extra)
{
/* Stop sync I/O and normal I/O and wait for everything to
* go quite.
* go quiet.
* This is called in two situations:
* 1) management command handlers (reshape, remove disk, quiesce).
* 2) one normal I/O request failed.
......@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
if (bio_data_dir(split) == READ)
if (bio_data_dir(split) == READ) {
raid1_read_request(mddev, split);
else
/*
* If a bio is splitted, the first part of bio will
* pass barrier but the bio is queued in
* current->bio_list (see generic_make_request). If
* there is a raise_barrier() called here, the second
* part of bio can't pass barrier. But since the first
* part bio isn't dispatched to underlaying disks yet,
* the barrier is never released, hence raise_barrier
* will alays wait. We have a deadlock.
* Note, this only happens in read path. For write
* path, the first part of bio is dispatched in a
* schedule() call (because of blk plug) or offloaded
* to raid10d.
* Quitting from the function immediately can change
* the bio order queued in bio_list and avoid the deadlock.
*/
if (split != bio) {
generic_make_request(bio);
break;
}
} else
raid1_write_request(mddev, split);
} while (split != bio);
}
......@@ -3246,8 +3267,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, newsize);
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
......
......@@ -1478,11 +1478,24 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_bdev = (void*)rdev;
atomic_inc(&r10_bio->remaining);
cb = blk_check_plugged(raid10_unplug, mddev,
sizeof(*plug));
if (cb)
plug = container_of(cb, struct raid10_plug_cb,
cb);
else
plug = NULL;
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
if (plug) {
bio_list_add(&plug->pending, mbio);
plug->pending_cnt++;
} else {
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
}
spin_unlock_irqrestore(&conf->device_lock, flags);
if (!mddev_check_plugged(mddev))
if (!plug)
md_wakeup_thread(mddev->thread);
}
}
......@@ -1572,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
/*
* If a bio is splitted, the first part of bio will pass
* barrier but the bio is queued in current->bio_list (see
* generic_make_request). If there is a raise_barrier() called
* here, the second part of bio can't pass barrier. But since
* the first part bio isn't dispatched to underlaying disks
* yet, the barrier is never released, hence raise_barrier will
* alays wait. We have a deadlock.
* Note, this only happens in read path. For write path, the
* first part of bio is dispatched in a schedule() call
* (because of blk plug) or offloaded to raid10d.
* Quitting from the function immediately can change the bio
* order queued in bio_list and avoid the deadlock.
*/
__make_request(mddev, split);
if (split != bio && bio_data_dir(bio) == READ) {
generic_make_request(bio);
break;
}
} while (split != bio);
/* In case raid10d snuck in to freeze_array */
......@@ -3944,10 +3975,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, size);
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > oldsize) {
mddev->recovery_cp = oldsize;
......
......@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
(test_bit(R5_Wantdrain, &dev->flags) ||
test_bit(R5_InJournal, &dev->flags))) ||
(srctype == SYNDROME_SRC_WRITTEN &&
dev->written)) {
(dev->written ||
test_bit(R5_InJournal, &dev->flags)))) {
if (test_bit(R5_InJournal, &dev->flags))
srcs[slot] = sh->dev[i].orig_page;
else
......@@ -7605,8 +7606,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, newsize);
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment