Commit 4324796e authored by Jens Axboe's avatar Jens Axboe

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.1/block

Pull MD updates and fixes from Song:

"1. Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan.
 2. Raid10 performance optimization, by Yu Kuai."

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Fix spelling mistake in comments of r5l_log
  md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d
  md/raid10: convert resync_lock to use seqlock
  md/raid10: fix improper BUG_ON() in raise_barrier()
  md/raid10: prevent unnecessary calls to wake_up() in fast path
  md/raid10: don't modify 'nr_waitng' in wait_barrier() for the case nowait
  md/raid10: factor out code from wait_barrier() to stop_waiting_barrier()
  md: Remove extra mddev_get() in md_seq_start()
  md/raid5: Remove unnecessary bio_put() in raid5_read_one_chunk()
  md/raid5: Ensure stripe_fill happens on non-read IO with journal
  md/raid5: Don't read ->active_stripes if it's not needed
  md/raid5: Cleanup prototype of raid5_get_active_stripe()
  md/raid5: Drop extern on function declarations in raid5.h
  md/raid5: Refactor raid5_get_active_stripe()
  md: Replace snprintf with scnprintf
  md/raid10: fix compile warning
  md/raid5: Fix spelling mistakes in comments
parents 9713a670 65b94b52
...@@ -8154,7 +8154,6 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos) ...@@ -8154,7 +8154,6 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
list_for_each(tmp,&all_mddevs) list_for_each(tmp,&all_mddevs)
if (!l--) { if (!l--) {
mddev = list_entry(tmp, struct mddev, all_mddevs); mddev = list_entry(tmp, struct mddev, all_mddevs);
mddev_get(mddev);
if (!mddev_get(mddev)) if (!mddev_get(mddev))
continue; continue;
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
......
...@@ -47,7 +47,7 @@ static void dump_zones(struct mddev *mddev) ...@@ -47,7 +47,7 @@ static void dump_zones(struct mddev *mddev)
int len = 0; int len = 0;
for (k = 0; k < conf->strip_zone[j].nb_dev; k++) for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
len += snprintf(line+len, 200-len, "%s%pg", k?"/":"", len += scnprintf(line+len, 200-len, "%s%pg", k?"/":"",
conf->devlist[j * raid_disks + k]->bdev); conf->devlist[j * raid_disks + k]->bdev);
pr_debug("md: zone%d=[%s]\n", j, line); pr_debug("md: zone%d=[%s]\n", j, line);
......
...@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf); ...@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
#include "raid1-10.c" #include "raid1-10.c"
#define NULL_CMD
#define cmd_before(conf, cmd) \
do { \
write_sequnlock_irq(&(conf)->resync_lock); \
cmd; \
} while (0)
#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
#define wait_event_barrier_cmd(conf, cond, cmd) \
wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
cmd_after(conf))
#define wait_event_barrier(conf, cond) \
wait_event_barrier_cmd(conf, cond, NULL_CMD)
/* /*
* for resync bio, r10bio pointer can be retrieved from the per-bio * for resync bio, r10bio pointer can be retrieved from the per-bio
* 'struct resync_pages'. * 'struct resync_pages'.
...@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio) ...@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio)
lower_barrier(conf); lower_barrier(conf);
} }
static void wake_up_barrier(struct r10conf *conf)
{
if (wq_has_sleeper(&conf->wait_barrier))
wake_up(&conf->wait_barrier);
}
static void reschedule_retry(struct r10bio *r10_bio) static void reschedule_retry(struct r10bio *r10_bio)
{ {
unsigned long flags; unsigned long flags;
...@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf) ...@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force) static void raise_barrier(struct r10conf *conf, int force)
{ {
write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier); BUG_ON(force && !conf->barrier);
spin_lock_irq(&conf->resync_lock);
/* Wait until no block IO is waiting (unless 'force') */ /* Wait until no block IO is waiting (unless 'force') */
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, wait_event_barrier(conf, force || !conf->nr_waiting);
conf->resync_lock);
/* block any new IO from starting */ /* block any new IO from starting */
conf->barrier++; WRITE_ONCE(conf->barrier, conf->barrier + 1);
/* Now wait for all pending IO to complete */ /* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier, wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
!atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, conf->barrier < RESYNC_DEPTH);
conf->resync_lock);
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
} }
static void lower_barrier(struct r10conf *conf) static void lower_barrier(struct r10conf *conf)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&conf->resync_lock, flags);
conf->barrier--; write_seqlock_irqsave(&conf->resync_lock, flags);
spin_unlock_irqrestore(&conf->resync_lock, flags); WRITE_ONCE(conf->barrier, conf->barrier - 1);
write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
static bool stop_waiting_barrier(struct r10conf *conf)
{
struct bio_list *bio_list = current->bio_list;
/* barrier is dropped */
if (!conf->barrier)
return true;
/*
* If there are already pending requests (preventing the barrier from
* rising completely), and the pre-process bio queue isn't empty, then
* don't wait, as we need to empty that queue to get the nr_pending
* count down.
*/
if (atomic_read(&conf->nr_pending) && bio_list &&
(!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
return true;
/* move on if recovery thread is blocked by us */
if (conf->mddev->thread->tsk == current &&
test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
conf->nr_queued > 0)
return true;
return false;
}
static bool wait_barrier_nolock(struct r10conf *conf)
{
unsigned int seq = read_seqbegin(&conf->resync_lock);
if (READ_ONCE(conf->barrier))
return false;
atomic_inc(&conf->nr_pending);
if (!read_seqretry(&conf->resync_lock, seq))
return true;
if (atomic_dec_and_test(&conf->nr_pending))
wake_up_barrier(conf);
return false;
}
static bool wait_barrier(struct r10conf *conf, bool nowait) static bool wait_barrier(struct r10conf *conf, bool nowait)
{ {
bool ret = true; bool ret = true;
spin_lock_irq(&conf->resync_lock); if (wait_barrier_nolock(conf))
return true;
write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) { if (conf->barrier) {
struct bio_list *bio_list = current->bio_list;
conf->nr_waiting++;
/* Wait for the barrier to drop.
* However if there are already pending
* requests (preventing the barrier from
* rising completely), and the
* pre-process bio queue isn't empty,
* then don't wait, as we need to empty
* that queue to get the nr_pending
* count down.
*/
/* Return false when nowait flag is set */ /* Return false when nowait flag is set */
if (nowait) { if (nowait) {
ret = false; ret = false;
} else { } else {
conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier"); raid10_log(conf->mddev, "wait barrier");
wait_event_lock_irq(conf->wait_barrier, wait_event_barrier(conf, stop_waiting_barrier(conf));
!conf->barrier || conf->nr_waiting--;
(atomic_read(&conf->nr_pending) &&
bio_list &&
(!bio_list_empty(&bio_list[0]) ||
!bio_list_empty(&bio_list[1]))) ||
/* move on if recovery thread is
* blocked by us
*/
(conf->mddev->thread->tsk == current &&
test_bit(MD_RECOVERY_RUNNING,
&conf->mddev->recovery) &&
conf->nr_queued > 0),
conf->resync_lock);
} }
conf->nr_waiting--;
if (!conf->nr_waiting) if (!conf->nr_waiting)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
/* Only increment nr_pending when we wait */ /* Only increment nr_pending when we wait */
if (ret) if (ret)
atomic_inc(&conf->nr_pending); atomic_inc(&conf->nr_pending);
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
return ret; return ret;
} }
...@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf) ...@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf)
{ {
if ((atomic_dec_and_test(&conf->nr_pending)) || if ((atomic_dec_and_test(&conf->nr_pending)) ||
(conf->array_freeze_pending)) (conf->array_freeze_pending))
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
} }
static void freeze_array(struct r10conf *conf, int extra) static void freeze_array(struct r10conf *conf, int extra)
...@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra) ...@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before * must match the number of pending IOs (nr_pending) before
* we continue. * we continue.
*/ */
spin_lock_irq(&conf->resync_lock); write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++; conf->array_freeze_pending++;
conf->barrier++; WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++; conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier, wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
atomic_read(&conf->nr_pending) == conf->nr_queued+extra, conf->nr_queued + extra, flush_pending_writes(conf));
conf->resync_lock,
flush_pending_writes(conf));
conf->array_freeze_pending--; conf->array_freeze_pending--;
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
} }
static void unfreeze_array(struct r10conf *conf) static void unfreeze_array(struct r10conf *conf)
{ {
/* reverse the effect of the freeze */ /* reverse the effect of the freeze */
spin_lock_irq(&conf->resync_lock); write_seqlock_irq(&conf->resync_lock);
conf->barrier--; WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--; conf->nr_waiting--;
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
} }
static sector_t choose_data_offset(struct r10bio *r10_bio, static sector_t choose_data_offset(struct r10bio *r10_bio,
...@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio) ...@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
__make_request(mddev, bio, sectors); __make_request(mddev, bio, sectors);
/* In case raid10d snuck in to freeze_array */ /* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
return true; return true;
} }
...@@ -1980,7 +2021,7 @@ static int enough(struct r10conf *conf, int ignore) ...@@ -1980,7 +2021,7 @@ static int enough(struct r10conf *conf, int ignore)
* Otherwise, it must be degraded: * Otherwise, it must be degraded:
* - recovery is interrupted. * - recovery is interrupted.
* - &mddev->degraded is bumped. * - &mddev->degraded is bumped.
*
* @rdev is marked as &Faulty excluding case when array is failed and * @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off. * &mddev->fail_last_dev is off.
*/ */
...@@ -4033,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) ...@@ -4033,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list); INIT_LIST_HEAD(&conf->bio_end_io_list);
spin_lock_init(&conf->resync_lock); seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier); init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0); atomic_set(&conf->nr_pending, 0);
...@@ -4352,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) ...@@ -4352,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2; rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size; rdev->sectors = size;
} }
conf->barrier = 1; WRITE_ONCE(conf->barrier, 1);
} }
return conf; return conf;
......
...@@ -76,7 +76,7 @@ struct r10conf { ...@@ -76,7 +76,7 @@ struct r10conf {
/* queue pending writes and submit them on unplug */ /* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list; struct bio_list pending_bio_list;
spinlock_t resync_lock; seqlock_t resync_lock;
atomic_t nr_pending; atomic_t nr_pending;
int nr_waiting; int nr_waiting;
int nr_queued; int nr_queued;
......
...@@ -125,7 +125,7 @@ struct r5l_log { ...@@ -125,7 +125,7 @@ struct r5l_log {
* reclaimed. if it's 0, reclaim spaces * reclaimed. if it's 0, reclaim spaces
* used by io_units which are in * used by io_units which are in
* IO_UNIT_STRIPE_END state (eg, reclaim * IO_UNIT_STRIPE_END state (eg, reclaim
* dones't wait for specific io_unit * doesn't wait for specific io_unit
* switching to IO_UNIT_STRIPE_END * switching to IO_UNIT_STRIPE_END
* state) */ * state) */
wait_queue_head_t iounit_wait; wait_queue_head_t iounit_wait;
...@@ -1327,9 +1327,9 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, ...@@ -1327,9 +1327,9 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
* superblock is updated to new log tail. Updating superblock (either * superblock is updated to new log tail. Updating superblock (either
* directly call md_update_sb() or depend on md thread) must hold * directly call md_update_sb() or depend on md thread) must hold
* reconfig mutex. On the other hand, raid5_quiesce is called with * reconfig mutex. On the other hand, raid5_quiesce is called with
* reconfig_mutex hold. The first step of raid5_quiesce() is waitting * reconfig_mutex hold. The first step of raid5_quiesce() is waiting
* for all IO finish, hence waitting for reclaim thread, while reclaim * for all IO finish, hence waiting for reclaim thread, while reclaim
* thread is calling this function and waitting for reconfig mutex. So * thread is calling this function and waiting for reconfig mutex. So
* there is a deadlock. We workaround this issue with a trylock. * there is a deadlock. We workaround this issue with a trylock.
* FIXME: we could miss discard if we can't take reconfig mutex * FIXME: we could miss discard if we can't take reconfig mutex
*/ */
...@@ -1923,7 +1923,8 @@ r5c_recovery_alloc_stripe( ...@@ -1923,7 +1923,8 @@ r5c_recovery_alloc_stripe(
{ {
struct stripe_head *sh; struct stripe_head *sh;
sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0); sh = raid5_get_active_stripe(conf, NULL, stripe_sect,
noblock ? R5_GAS_NOBLOCK : 0);
if (!sh) if (!sh)
return NULL; /* no more stripe available */ return NULL; /* no more stripe available */
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
*/ */
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/raid/pq.h> #include <linux/raid/pq.h>
#include <linux/async_tx.h> #include <linux/async_tx.h>
...@@ -789,87 +790,80 @@ struct stripe_request_ctx { ...@@ -789,87 +790,80 @@ struct stripe_request_ctx {
*/ */
static bool is_inactive_blocked(struct r5conf *conf, int hash) static bool is_inactive_blocked(struct r5conf *conf, int hash)
{ {
int active = atomic_read(&conf->active_stripes);
if (list_empty(conf->inactive_list + hash)) if (list_empty(conf->inactive_list + hash))
return false; return false;
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
return true; return true;
return active < (conf->max_nr_stripes * 3 / 4); return (atomic_read(&conf->active_stripes) <
(conf->max_nr_stripes * 3 / 4));
} }
static struct stripe_head *__raid5_get_active_stripe(struct r5conf *conf, struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,
struct stripe_request_ctx *ctx, sector_t sector, struct stripe_request_ctx *ctx, sector_t sector,
bool previous, bool noblock, bool noquiesce) unsigned int flags)
{ {
struct stripe_head *sh; struct stripe_head *sh;
int hash = stripe_hash_locks_hash(conf, sector); int hash = stripe_hash_locks_hash(conf, sector);
int previous = !!(flags & R5_GAS_PREVIOUS);
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
spin_lock_irq(conf->hash_locks + hash); spin_lock_irq(conf->hash_locks + hash);
retry: for (;;) {
if (!noquiesce && conf->quiesce) { if (!(flags & R5_GAS_NOQUIESCE) && conf->quiesce) {
/* /*
* Must release the reference to batch_last before waiting, * Must release the reference to batch_last before
* on quiesce, otherwise the batch_last will hold a reference * waiting, on quiesce, otherwise the batch_last will
* to a stripe and raid5_quiesce() will deadlock waiting for * hold a reference to a stripe and raid5_quiesce()
* active_stripes to go to zero. * will deadlock waiting for active_stripes to go to
*/ * zero.
if (ctx && ctx->batch_last) { */
raid5_release_stripe(ctx->batch_last); if (ctx && ctx->batch_last) {
ctx->batch_last = NULL; raid5_release_stripe(ctx->batch_last);
} ctx->batch_last = NULL;
}
wait_event_lock_irq(conf->wait_for_quiescent, !conf->quiesce,
*(conf->hash_locks + hash));
}
sh = find_get_stripe(conf, sector, conf->generation - previous, hash); wait_event_lock_irq(conf->wait_for_quiescent,
if (sh) !conf->quiesce,
goto out; *(conf->hash_locks + hash));
}
if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) sh = find_get_stripe(conf, sector, conf->generation - previous,
goto wait_for_stripe; hash);
if (sh)
break;
sh = get_free_stripe(conf, hash); if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
if (sh) { sh = get_free_stripe(conf, hash);
r5c_check_stripe_cache_usage(conf); if (sh) {
init_stripe(sh, sector, previous); r5c_check_stripe_cache_usage(conf);
atomic_inc(&sh->count); init_stripe(sh, sector, previous);
goto out; atomic_inc(&sh->count);
} break;
}
if (!test_bit(R5_DID_ALLOC, &conf->cache_state)) if (!test_bit(R5_DID_ALLOC, &conf->cache_state))
set_bit(R5_ALLOC_MORE, &conf->cache_state); set_bit(R5_ALLOC_MORE, &conf->cache_state);
}
wait_for_stripe: if (flags & R5_GAS_NOBLOCK)
if (noblock) break;
goto out;
set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
r5l_wake_reclaim(conf->log, 0); r5l_wake_reclaim(conf->log, 0);
wait_event_lock_irq(conf->wait_for_stripe, wait_event_lock_irq(conf->wait_for_stripe,
is_inactive_blocked(conf, hash), is_inactive_blocked(conf, hash),
*(conf->hash_locks + hash)); *(conf->hash_locks + hash));
clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
goto retry; }
out:
spin_unlock_irq(conf->hash_locks + hash); spin_unlock_irq(conf->hash_locks + hash);
return sh; return sh;
} }
struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,
sector_t sector, bool previous, bool noblock, bool noquiesce)
{
return __raid5_get_active_stripe(conf, NULL, sector, previous, noblock,
noquiesce);
}
static bool is_full_stripe_write(struct stripe_head *sh) static bool is_full_stripe_write(struct stripe_head *sh)
{ {
BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
...@@ -4047,7 +4041,7 @@ static void handle_stripe_fill(struct stripe_head *sh, ...@@ -4047,7 +4041,7 @@ static void handle_stripe_fill(struct stripe_head *sh,
* back cache (prexor with orig_page, and then xor with * back cache (prexor with orig_page, and then xor with
* page) in the read path * page) in the read path
*/ */
if (s->injournal && s->failed) { if (s->to_read && s->injournal && s->failed) {
if (test_bit(STRIPE_R5C_CACHING, &sh->state)) if (test_bit(STRIPE_R5C_CACHING, &sh->state))
r5c_make_stripe_write_out(sh); r5c_make_stripe_write_out(sh);
goto out; goto out;
...@@ -4636,7 +4630,8 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) ...@@ -4636,7 +4630,8 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
sector_t bn = raid5_compute_blocknr(sh, i, 1); sector_t bn = raid5_compute_blocknr(sh, i, 1);
sector_t s = raid5_compute_sector(conf, bn, 0, sector_t s = raid5_compute_sector(conf, bn, 0,
&dd_idx, NULL); &dd_idx, NULL);
sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1); sh2 = raid5_get_active_stripe(conf, NULL, s,
R5_GAS_NOBLOCK | R5_GAS_NOQUIESCE);
if (sh2 == NULL) if (sh2 == NULL)
/* so far only the early blocks of this stripe /* so far only the early blocks of this stripe
* have been requested. When later blocks * have been requested. When later blocks
...@@ -5273,7 +5268,9 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -5273,7 +5268,9 @@ static void handle_stripe(struct stripe_head *sh)
/* Finish reconstruct operations initiated by the expansion process */ /* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) { if (sh->reconstruct_state == reconstruct_state_result) {
struct stripe_head *sh_src struct stripe_head *sh_src
= raid5_get_active_stripe(conf, sh->sector, 1, 1, 1); = raid5_get_active_stripe(conf, NULL, sh->sector,
R5_GAS_PREVIOUS | R5_GAS_NOBLOCK |
R5_GAS_NOQUIESCE);
if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
/* sh cannot be written until sh_src has been read. /* sh cannot be written until sh_src has been read.
* so arrange for sh to be delayed a little * so arrange for sh to be delayed a little
...@@ -5542,7 +5539,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5542,7 +5539,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad, if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
&bad_sectors)) { &bad_sectors)) {
bio_put(raid_bio);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
return 0; return 0;
} }
...@@ -5823,7 +5819,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) ...@@ -5823,7 +5819,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
DEFINE_WAIT(w); DEFINE_WAIT(w);
int d; int d;
again: again:
sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0); sh = raid5_get_active_stripe(conf, NULL, logical_sector, 0);
prepare_to_wait(&conf->wait_for_overlap, &w, prepare_to_wait(&conf->wait_for_overlap, &w,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
...@@ -5978,7 +5974,7 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, ...@@ -5978,7 +5974,7 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
enum stripe_result ret; enum stripe_result ret;
struct stripe_head *sh; struct stripe_head *sh;
sector_t new_sector; sector_t new_sector;
int previous = 0; int previous = 0, flags = 0;
int seq, dd_idx; int seq, dd_idx;
seq = read_seqcount_begin(&conf->gen_lock); seq = read_seqcount_begin(&conf->gen_lock);
...@@ -6012,8 +6008,11 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, ...@@ -6012,8 +6008,11 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
pr_debug("raid456: %s, sector %llu logical %llu\n", __func__, pr_debug("raid456: %s, sector %llu logical %llu\n", __func__,
new_sector, logical_sector); new_sector, logical_sector);
sh = __raid5_get_active_stripe(conf, ctx, new_sector, previous, if (previous)
(bi->bi_opf & REQ_RAHEAD), 0); flags |= R5_GAS_PREVIOUS;
if (bi->bi_opf & REQ_RAHEAD)
flags |= R5_GAS_NOBLOCK;
sh = raid5_get_active_stripe(conf, ctx, new_sector, flags);
if (unlikely(!sh)) { if (unlikely(!sh)) {
/* cannot get stripe, just give-up */ /* cannot get stripe, just give-up */
bi->bi_status = BLK_STS_IOERR; bi->bi_status = BLK_STS_IOERR;
...@@ -6362,7 +6361,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk ...@@ -6362,7 +6361,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) {
int j; int j;
int skipped_disk = 0; int skipped_disk = 0;
sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1); sh = raid5_get_active_stripe(conf, NULL, stripe_addr+i,
R5_GAS_NOQUIESCE);
set_bit(STRIPE_EXPANDING, &sh->state); set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes); atomic_inc(&conf->reshape_stripes);
/* If any of this stripe is beyond the end of the old /* If any of this stripe is beyond the end of the old
...@@ -6411,7 +6411,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk ...@@ -6411,7 +6411,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
if (last_sector >= mddev->dev_sectors) if (last_sector >= mddev->dev_sectors)
last_sector = mddev->dev_sectors - 1; last_sector = mddev->dev_sectors - 1;
while (first_sector <= last_sector) { while (first_sector <= last_sector) {
sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1); sh = raid5_get_active_stripe(conf, NULL, first_sector,
R5_GAS_PREVIOUS | R5_GAS_NOQUIESCE);
set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
raid5_release_stripe(sh); raid5_release_stripe(sh);
...@@ -6531,9 +6532,10 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n ...@@ -6531,9 +6532,10 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0); sh = raid5_get_active_stripe(conf, NULL, sector_nr,
R5_GAS_NOBLOCK);
if (sh == NULL) { if (sh == NULL) {
sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0); sh = raid5_get_active_stripe(conf, NULL, sector_nr, 0);
/* make sure we don't swamp the stripe cache if someone else /* make sure we don't swamp the stripe cache if someone else
* is trying to get access * is trying to get access
*/ */
...@@ -6596,8 +6598,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, ...@@ -6596,8 +6598,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
/* already done this stripe */ /* already done this stripe */
continue; continue;
sh = raid5_get_active_stripe(conf, sector, 0, 1, 1); sh = raid5_get_active_stripe(conf, NULL, sector,
R5_GAS_NOBLOCK | R5_GAS_NOQUIESCE);
if (!sh) { if (!sh) {
/* failed to get a stripe - must wait */ /* failed to get a stripe - must wait */
conf->retry_read_aligned = raid_bio; conf->retry_read_aligned = raid_bio;
...@@ -6781,7 +6783,18 @@ static void raid5d(struct md_thread *thread) ...@@ -6781,7 +6783,18 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev); md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
/*
* Waiting on MD_SB_CHANGE_PENDING below may deadlock
* seeing md_check_recovery() is needed to clear
* the flag when using mdmon.
*/
continue;
} }
wait_event_lock_irq(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
conf->device_lock);
} }
pr_debug("%d stripes handled\n", handled); pr_debug("%d stripes handled\n", handled);
......
...@@ -803,16 +803,24 @@ raid5_get_dev_page(struct stripe_head *sh, int disk_idx) ...@@ -803,16 +803,24 @@ raid5_get_dev_page(struct stripe_head *sh, int disk_idx)
} }
#endif #endif
extern void md_raid5_kick_device(struct r5conf *conf); void md_raid5_kick_device(struct r5conf *conf);
extern int raid5_set_cache_size(struct mddev *mddev, int size); int raid5_set_cache_size(struct mddev *mddev, int size);
extern sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous); sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous);
extern void raid5_release_stripe(struct stripe_head *sh); void raid5_release_stripe(struct stripe_head *sh);
extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
int previous, int *dd_idx, int previous, int *dd_idx, struct stripe_head *sh);
struct stripe_head *sh);
extern struct stripe_head * struct stripe_request_ctx;
raid5_get_active_stripe(struct r5conf *conf, sector_t sector, /* get stripe from previous generation (when reshaping) */
bool previous, bool noblock, bool noquiesce); #define R5_GAS_PREVIOUS (1 << 0)
extern int raid5_calc_degraded(struct r5conf *conf); /* do not block waiting for a free stripe */
extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode); #define R5_GAS_NOBLOCK (1 << 1)
/* do not block waiting for quiesce to be released */
#define R5_GAS_NOQUIESCE (1 << 2)
struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,
struct stripe_request_ctx *ctx, sector_t sector,
unsigned int flags);
int raid5_calc_degraded(struct r5conf *conf);
int r5c_journal_mode_set(struct mddev *mddev, int journal_mode);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment