Commit b73c67c2 authored by Damien Le Moal's avatar Damien Le Moal Committed by Mike Snitzer

dm kcopyd: add sequential write feature

When copyying blocks to host-managed zoned block devices, writes must be
sequential.  However, dm_kcopyd_copy() does not guarantee this as writes
are issued in the completion order of reads, and reads may complete out
of order despite being issued sequentially.

Fix this by introducing the DM_KCOPYD_WRITE_SEQ feature flag.  This can
be specified when calling dm_kcopyd_copy() and should be set
automatically if one of the destinations is a host-managed zoned block
device.  For a split job, the master job maintains the write position at
which writes must be issued.  This is checked with the pop() function
which is modified to not return any write I/O sub job that is not at the
correct write position.

When DM_KCOPYD_WRITE_SEQ is specified for a job, errors cannot be
ignored and the flag DM_KCOPYD_IGNORE_ERROR is ignored, even if
specified by the user.
Signed-off-by: default avatarDamien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.com>
Reviewed-by: default avatarBart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 0be12c1c
......@@ -356,6 +356,7 @@ struct kcopyd_job {
struct mutex lock;
atomic_t sub_jobs;
sector_t progress;
sector_t write_offset;
struct kcopyd_job *master_job;
};
......@@ -386,6 +387,31 @@ void dm_kcopyd_exit(void)
* Functions to push and pop a job onto the head of a given job
* list.
*/
static struct kcopyd_job *pop_io_job(struct list_head *jobs,
struct dm_kcopyd_client *kc)
{
struct kcopyd_job *job;
/*
* For I/O jobs, pop any read, any write without sequential write
* constraint and sequential writes that are at the right position.
*/
list_for_each_entry(job, jobs, list) {
if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
list_del(&job->list);
return job;
}
if (job->write_offset == job->master_job->write_offset) {
job->master_job->write_offset += job->source.count;
list_del(&job->list);
return job;
}
}
return NULL;
}
static struct kcopyd_job *pop(struct list_head *jobs,
struct dm_kcopyd_client *kc)
{
......@@ -395,9 +421,13 @@ static struct kcopyd_job *pop(struct list_head *jobs,
spin_lock_irqsave(&kc->job_lock, flags);
if (!list_empty(jobs)) {
if (jobs == &kc->io_jobs)
job = pop_io_job(jobs, kc);
else {
job = list_entry(jobs->next, struct kcopyd_job, list);
list_del(&job->list);
}
}
spin_unlock_irqrestore(&kc->job_lock, flags);
return job;
......@@ -506,6 +536,14 @@ static int run_io_job(struct kcopyd_job *job)
.client = job->kc->io_client,
};
/*
* If we need to write sequentially and some reads or writes failed,
* no point in continuing.
*/
if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
job->master_job->write_err)
return -EIO;
io_job_start(job->kc->throttle);
if (job->rw == READ)
......@@ -655,6 +693,7 @@ static void segment_complete(int read_err, unsigned long write_err,
int i;
*sub_job = *job;
sub_job->write_offset = progress;
sub_job->source.sector += progress;
sub_job->source.count = count;
......@@ -723,6 +762,27 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->num_dests = num_dests;
memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
/*
* If one of the destination is a host-managed zoned block device,
* we need to write sequentially. If one of the destination is a
* host-aware device, then leave it to the caller to choose what to do.
*/
if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
for (i = 0; i < job->num_dests; i++) {
if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags);
break;
}
}
}
/*
* If we need to write sequentially, errors cannot be ignored.
*/
if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags))
clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags);
if (from) {
job->source = *from;
job->pages = NULL;
......@@ -746,6 +806,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->fn = fn;
job->context = context;
job->master_job = job;
job->write_offset = 0;
if (job->source.count <= SUB_JOB_SIZE)
dispatch_job(job);
......
......@@ -20,6 +20,7 @@
#define DM_KCOPYD_MAX_REGIONS 8
#define DM_KCOPYD_IGNORE_ERROR 1
#define DM_KCOPYD_WRITE_SEQ 2
struct dm_kcopyd_throttle {
unsigned throttle;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment