Commit 74173ff4 authored by Song Liu's avatar Song Liu

Merge branch 'md-next-raid10-optimize' into md-next

This patchset tries to avoid that two locks are held unconditionally
in hot path.

Test environment:

Architecture:
aarch64 Huawei KUNPENG 920
x86 Intel(R) Xeon(R) Platinum 8380

Raid10 initialize:
mdadm --create /dev/md0 --level 10 --bitmap none --raid-devices 4 \
    /dev/nvme0n1 /dev/nvme1n1 /dev/nvme2n1 /dev/nvme3n1

Test cmd:
(task set -c 0-15) fio -name=0 -ioengine=libaio -direct=1 -\
    group_reporting=1 -randseed=2022 -rwmixread=70 -refill_buffers \
    -filename=/dev/md0 -numjobs=16 -runtime=60s -bs=4k -iodepth=256 \
    -rw=randread

Test result:

aarch64:
before this patchset:           3.2 GiB/s
bind node before this patchset: 6.9 Gib/s
after this patchset:            7.9 Gib/s
bind node after this patchset:  8.0 Gib/s

x86:(bind node is not tested yet)
before this patchset: 7.0 GiB/s
after this patchset : 9.3 GiB/s

Please noted that in the test machine, memory access latency is very bad
across nodes compare to local node in aarch64, which is why bandwidth
while bind node is much better.
parents 3bfc3bcd b9b083f9
...@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf); ...@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
#include "raid1-10.c" #include "raid1-10.c"
#define NULL_CMD
#define cmd_before(conf, cmd) \
do { \
write_sequnlock_irq(&(conf)->resync_lock); \
cmd; \
} while (0)
#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
#define wait_event_barrier_cmd(conf, cond, cmd) \
wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
cmd_after(conf))
#define wait_event_barrier(conf, cond) \
wait_event_barrier_cmd(conf, cond, NULL_CMD)
/* /*
* for resync bio, r10bio pointer can be retrieved from the per-bio * for resync bio, r10bio pointer can be retrieved from the per-bio
* 'struct resync_pages'. * 'struct resync_pages'.
...@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio) ...@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio)
lower_barrier(conf); lower_barrier(conf);
} }
static void wake_up_barrier(struct r10conf *conf)
{
if (wq_has_sleeper(&conf->wait_barrier))
wake_up(&conf->wait_barrier);
}
static void reschedule_retry(struct r10bio *r10_bio) static void reschedule_retry(struct r10bio *r10_bio)
{ {
unsigned long flags; unsigned long flags;
...@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf) ...@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force) static void raise_barrier(struct r10conf *conf, int force)
{ {
write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier); BUG_ON(force && !conf->barrier);
spin_lock_irq(&conf->resync_lock);
/* Wait until no block IO is waiting (unless 'force') */ /* Wait until no block IO is waiting (unless 'force') */
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, wait_event_barrier(conf, force || !conf->nr_waiting);
conf->resync_lock);
/* block any new IO from starting */ /* block any new IO from starting */
conf->barrier++; WRITE_ONCE(conf->barrier, conf->barrier + 1);
/* Now wait for all pending IO to complete */ /* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier, wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
!atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, conf->barrier < RESYNC_DEPTH);
conf->resync_lock);
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
} }
static void lower_barrier(struct r10conf *conf) static void lower_barrier(struct r10conf *conf)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&conf->resync_lock, flags);
conf->barrier--; write_seqlock_irqsave(&conf->resync_lock, flags);
spin_unlock_irqrestore(&conf->resync_lock, flags); WRITE_ONCE(conf->barrier, conf->barrier - 1);
write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
static bool stop_waiting_barrier(struct r10conf *conf)
{
struct bio_list *bio_list = current->bio_list;
/* barrier is dropped */
if (!conf->barrier)
return true;
/*
* If there are already pending requests (preventing the barrier from
* rising completely), and the pre-process bio queue isn't empty, then
* don't wait, as we need to empty that queue to get the nr_pending
* count down.
*/
if (atomic_read(&conf->nr_pending) && bio_list &&
(!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
return true;
/* move on if recovery thread is blocked by us */
if (conf->mddev->thread->tsk == current &&
test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
conf->nr_queued > 0)
return true;
return false;
}
static bool wait_barrier_nolock(struct r10conf *conf)
{
unsigned int seq = read_seqbegin(&conf->resync_lock);
if (READ_ONCE(conf->barrier))
return false;
atomic_inc(&conf->nr_pending);
if (!read_seqretry(&conf->resync_lock, seq))
return true;
if (atomic_dec_and_test(&conf->nr_pending))
wake_up_barrier(conf);
return false;
}
static bool wait_barrier(struct r10conf *conf, bool nowait) static bool wait_barrier(struct r10conf *conf, bool nowait)
{ {
bool ret = true; bool ret = true;
spin_lock_irq(&conf->resync_lock); if (wait_barrier_nolock(conf))
return true;
write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) { if (conf->barrier) {
struct bio_list *bio_list = current->bio_list;
conf->nr_waiting++;
/* Wait for the barrier to drop.
* However if there are already pending
* requests (preventing the barrier from
* rising completely), and the
* pre-process bio queue isn't empty,
* then don't wait, as we need to empty
* that queue to get the nr_pending
* count down.
*/
/* Return false when nowait flag is set */ /* Return false when nowait flag is set */
if (nowait) { if (nowait) {
ret = false; ret = false;
} else { } else {
conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier"); raid10_log(conf->mddev, "wait barrier");
wait_event_lock_irq(conf->wait_barrier, wait_event_barrier(conf, stop_waiting_barrier(conf));
!conf->barrier || conf->nr_waiting--;
(atomic_read(&conf->nr_pending) &&
bio_list &&
(!bio_list_empty(&bio_list[0]) ||
!bio_list_empty(&bio_list[1]))) ||
/* move on if recovery thread is
* blocked by us
*/
(conf->mddev->thread->tsk == current &&
test_bit(MD_RECOVERY_RUNNING,
&conf->mddev->recovery) &&
conf->nr_queued > 0),
conf->resync_lock);
} }
conf->nr_waiting--;
if (!conf->nr_waiting) if (!conf->nr_waiting)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
/* Only increment nr_pending when we wait */ /* Only increment nr_pending when we wait */
if (ret) if (ret)
atomic_inc(&conf->nr_pending); atomic_inc(&conf->nr_pending);
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
return ret; return ret;
} }
...@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf) ...@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf)
{ {
if ((atomic_dec_and_test(&conf->nr_pending)) || if ((atomic_dec_and_test(&conf->nr_pending)) ||
(conf->array_freeze_pending)) (conf->array_freeze_pending))
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
} }
static void freeze_array(struct r10conf *conf, int extra) static void freeze_array(struct r10conf *conf, int extra)
...@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra) ...@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before * must match the number of pending IOs (nr_pending) before
* we continue. * we continue.
*/ */
spin_lock_irq(&conf->resync_lock); write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++; conf->array_freeze_pending++;
conf->barrier++; WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++; conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier, wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
atomic_read(&conf->nr_pending) == conf->nr_queued+extra, conf->nr_queued + extra, flush_pending_writes(conf));
conf->resync_lock,
flush_pending_writes(conf));
conf->array_freeze_pending--; conf->array_freeze_pending--;
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
} }
static void unfreeze_array(struct r10conf *conf) static void unfreeze_array(struct r10conf *conf)
{ {
/* reverse the effect of the freeze */ /* reverse the effect of the freeze */
spin_lock_irq(&conf->resync_lock); write_seqlock_irq(&conf->resync_lock);
conf->barrier--; WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--; conf->nr_waiting--;
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
spin_unlock_irq(&conf->resync_lock); write_sequnlock_irq(&conf->resync_lock);
} }
static sector_t choose_data_offset(struct r10bio *r10_bio, static sector_t choose_data_offset(struct r10bio *r10_bio,
...@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio) ...@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
__make_request(mddev, bio, sectors); __make_request(mddev, bio, sectors);
/* In case raid10d snuck in to freeze_array */ /* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
return true; return true;
} }
...@@ -4033,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) ...@@ -4033,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list); INIT_LIST_HEAD(&conf->bio_end_io_list);
spin_lock_init(&conf->resync_lock); seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier); init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0); atomic_set(&conf->nr_pending, 0);
...@@ -4352,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) ...@@ -4352,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2; rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size; rdev->sectors = size;
} }
conf->barrier = 1; WRITE_ONCE(conf->barrier, 1);
} }
return conf; return conf;
......
...@@ -76,7 +76,7 @@ struct r10conf { ...@@ -76,7 +76,7 @@ struct r10conf {
/* queue pending writes and submit them on unplug */ /* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list; struct bio_list pending_bio_list;
spinlock_t resync_lock; seqlock_t resync_lock;
atomic_t nr_pending; atomic_t nr_pending;
int nr_waiting; int nr_waiting;
int nr_queued; int nr_queued;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment