Commit 1e3b21c6 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Mike Snitzer

dm integrity: optimize writing dm-bufio buffers that are partially changed

Rather than write the entire dm-bufio buffer when only a subset is
changed, improve dm-bufio (and dm-integrity) by only writing the subset
of the buffer that changed.

Update dm-integrity to make use of dm-bufio's new
dm_bufio_mark_partial_buffer_dirty() interface.
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent dc6364b5
...@@ -63,6 +63,12 @@ ...@@ -63,6 +63,12 @@
#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1) #define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1)
#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1))
/*
* Align buffer writes to this boundary.
* Tests show that SSDs have the highest IOPS when using 4k writes.
*/
#define DM_BUFIO_WRITE_ALIGN 4096
/* /*
* dm_buffer->list_mode * dm_buffer->list_mode
*/ */
...@@ -149,6 +155,10 @@ struct dm_buffer { ...@@ -149,6 +155,10 @@ struct dm_buffer {
blk_status_t write_error; blk_status_t write_error;
unsigned long state; unsigned long state;
unsigned long last_accessed; unsigned long last_accessed;
unsigned dirty_start;
unsigned dirty_end;
unsigned write_start;
unsigned write_end;
struct dm_bufio_client *c; struct dm_bufio_client *c;
struct list_head write_list; struct list_head write_list;
struct bio bio; struct bio bio;
...@@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context) ...@@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context)
} }
static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
unsigned n_sectors, bio_end_io_t *end_io) unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
{ {
int r; int r;
struct dm_io_request io_req = { struct dm_io_request io_req = {
...@@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, ...@@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
if (b->data_mode != DATA_MODE_VMALLOC) { if (b->data_mode != DATA_MODE_VMALLOC) {
io_req.mem.type = DM_IO_KMEM; io_req.mem.type = DM_IO_KMEM;
io_req.mem.ptr.addr = b->data; io_req.mem.ptr.addr = (char *)b->data + offset;
} else { } else {
io_req.mem.type = DM_IO_VMA; io_req.mem.type = DM_IO_VMA;
io_req.mem.ptr.vma = b->data; io_req.mem.ptr.vma = (char *)b->data + offset;
} }
b->bio.bi_end_io = end_io; b->bio.bi_end_io = end_io;
...@@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio) ...@@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio)
} }
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
unsigned n_sectors, bio_end_io_t *end_io) unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
{ {
char *ptr; char *ptr;
int len; unsigned len;
bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
b->bio.bi_iter.bi_sector = sector; b->bio.bi_iter.bi_sector = sector;
...@@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, ...@@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
b->bio.bi_private = end_io; b->bio.bi_private = end_io;
bio_set_op_attrs(&b->bio, rw, 0); bio_set_op_attrs(&b->bio, rw, 0);
/* ptr = (char *)b->data + offset;
* We assume that if len >= PAGE_SIZE ptr is page-aligned.
* If len < PAGE_SIZE the buffer doesn't cross page boundary.
*/
ptr = b->data;
len = n_sectors << SECTOR_SHIFT; len = n_sectors << SECTOR_SHIFT;
if (len >= PAGE_SIZE)
BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
else
BUG_ON((unsigned long)ptr & (len - 1));
do { do {
if (!bio_add_page(&b->bio, virt_to_page(ptr), unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
len < PAGE_SIZE ? len : PAGE_SIZE, if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
offset_in_page(ptr))) { offset_in_page(ptr))) {
BUG_ON(b->c->block_size <= PAGE_SIZE); BUG_ON(b->c->block_size <= PAGE_SIZE);
use_dmio(b, rw, sector, n_sectors, end_io); use_dmio(b, rw, sector, n_sectors, offset, end_io);
return; return;
} }
len -= PAGE_SIZE; len -= this_step;
ptr += PAGE_SIZE; ptr += this_step;
} while (len > 0); } while (len > 0);
submit_bio(&b->bio); submit_bio(&b->bio);
...@@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) ...@@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
{ {
unsigned n_sectors; unsigned n_sectors;
sector_t sector; sector_t sector;
unsigned offset, end;
if (rw == WRITE && b->c->write_callback)
b->c->write_callback(b);
sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
n_sectors = 1 << b->c->sectors_per_block_bits;
if (rw != WRITE) {
n_sectors = 1 << b->c->sectors_per_block_bits;
offset = 0;
} else {
if (b->c->write_callback)
b->c->write_callback(b);
offset = b->write_start;
end = b->write_end;
offset &= -DM_BUFIO_WRITE_ALIGN;
end += DM_BUFIO_WRITE_ALIGN - 1;
end &= -DM_BUFIO_WRITE_ALIGN;
if (unlikely(end > b->c->block_size))
end = b->c->block_size;
sector += offset >> SECTOR_SHIFT;
n_sectors = (end - offset) >> SECTOR_SHIFT;
}
if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
b->data_mode != DATA_MODE_VMALLOC) b->data_mode != DATA_MODE_VMALLOC)
use_inline_bio(b, rw, sector, n_sectors, end_io); use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
else else
use_dmio(b, rw, sector, n_sectors, end_io); use_dmio(b, rw, sector, n_sectors, offset, end_io);
} }
/*---------------------------------------------------------------- /*----------------------------------------------------------------
...@@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b, ...@@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b,
clear_bit(B_DIRTY, &b->state); clear_bit(B_DIRTY, &b->state);
wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
b->write_start = b->dirty_start;
b->write_end = b->dirty_end;
if (!write_list) if (!write_list)
submit_io(b, WRITE, write_endio); submit_io(b, WRITE, write_endio);
else else
...@@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b) ...@@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b)
} }
EXPORT_SYMBOL_GPL(dm_bufio_release); EXPORT_SYMBOL_GPL(dm_bufio_release);
void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
unsigned start, unsigned end)
{ {
struct dm_bufio_client *c = b->c; struct dm_bufio_client *c = b->c;
BUG_ON(start >= end);
BUG_ON(end > b->c->block_size);
dm_bufio_lock(c); dm_bufio_lock(c);
BUG_ON(test_bit(B_READING, &b->state)); BUG_ON(test_bit(B_READING, &b->state));
if (!test_and_set_bit(B_DIRTY, &b->state)) if (!test_and_set_bit(B_DIRTY, &b->state)) {
b->dirty_start = start;
b->dirty_end = end;
__relink_lru(b, LIST_DIRTY); __relink_lru(b, LIST_DIRTY);
} else {
if (start < b->dirty_start)
b->dirty_start = start;
if (end > b->dirty_end)
b->dirty_end = end;
}
dm_bufio_unlock(c); dm_bufio_unlock(c);
} }
EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
{
dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
}
EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
...@@ -1398,6 +1435,8 @@ void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block) ...@@ -1398,6 +1435,8 @@ void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
wait_on_bit_io(&b->state, B_WRITING, wait_on_bit_io(&b->state, B_WRITING,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
set_bit(B_DIRTY, &b->state); set_bit(B_DIRTY, &b->state);
b->dirty_start = 0;
b->dirty_end = c->block_size;
__unlink_buffer(b); __unlink_buffer(b);
__link_buffer(b, new_block, LIST_DIRTY); __link_buffer(b, new_block, LIST_DIRTY);
} else { } else {
......
...@@ -93,6 +93,15 @@ void dm_bufio_release(struct dm_buffer *b); ...@@ -93,6 +93,15 @@ void dm_bufio_release(struct dm_buffer *b);
*/ */
void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
/*
* Mark a part of the buffer dirty.
*
* The specified part of the buffer is scheduled to be written. dm-bufio may
* write the specified part of the buffer or it may write a larger superset.
*/
void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
unsigned start, unsigned end);
/* /*
* Initiate writing of dirty buffers, without waiting for completion. * Initiate writing of dirty buffers, without waiting for completion.
*/ */
......
...@@ -1040,7 +1040,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se ...@@ -1040,7 +1040,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
memcpy(tag, dp, to_copy); memcpy(tag, dp, to_copy);
} else if (op == TAG_WRITE) { } else if (op == TAG_WRITE) {
memcpy(dp, tag, to_copy); memcpy(dp, tag, to_copy);
dm_bufio_mark_buffer_dirty(b); dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
} else { } else {
/* e.g.: op == TAG_CMP */ /* e.g.: op == TAG_CMP */
if (unlikely(memcmp(dp, tag, to_copy))) { if (unlikely(memcmp(dp, tag, to_copy))) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment