Commit 764c7c9a authored by Johannes Thumshirn's avatar Johannes Thumshirn Committed by David Sterba

btrfs: zoned: fix parallel compressed writes

When multiple processes write data to the same block group on a
compressed zoned filesystem, the underlying device could report I/O
errors and data corruption is possible.

This happens because on a zoned file system, compressed data writes
where sent to the device via a REQ_OP_WRITE instead of a
REQ_OP_ZONE_APPEND operation. But with REQ_OP_WRITE and parallel
submission it cannot be guaranteed that the data is always submitted
aligned to the underlying zone's write pointer.

The change to using REQ_OP_ZONE_APPEND instead of REQ_OP_WRITE on a
zoned filesystem is non intrusive on a regular file system or when
submitting to a conventional zone on a zoned filesystem, as it is
guarded by btrfs_use_zone_append.
Reported-by: default avatarDavid Sterba <dsterba@suse.com>
Fixes: 9d294a68 ("btrfs: zoned: enable to mount ZONED incompat flag")
CC: stable@vger.kernel.org # 5.12.x: e380adfc: btrfs: zoned: pass start block to btrfs_use_zone_append
CC: stable@vger.kernel.org # 5.12.x
Signed-off-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent e380adfc
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "compression.h" #include "compression.h"
#include "extent_io.h" #include "extent_io.h"
#include "extent_map.h" #include "extent_map.h"
#include "zoned.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
...@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio) ...@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio)
*/ */
inode = cb->inode; inode = cb->inode;
cb->compressed_pages[0]->mapping = cb->inode->i_mapping; cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
btrfs_record_physical_zoned(inode, cb->start, bio);
btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0], btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
cb->start, cb->start + cb->len - 1, cb->start, cb->start + cb->len - 1,
bio->bi_status == BLK_STS_OK); bio->bi_status == BLK_STS_OK);
...@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ...@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
u64 first_byte = disk_start; u64 first_byte = disk_start;
blk_status_t ret; blk_status_t ret;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
const bool use_append = btrfs_use_zone_append(inode, disk_start);
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
WARN_ON(!PAGE_ALIGNED(start)); WARN_ON(!PAGE_ALIGNED(start));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
...@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ...@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->nr_pages = nr_pages; cb->nr_pages = nr_pages;
bio = btrfs_bio_alloc(first_byte); bio = btrfs_bio_alloc(first_byte);
bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb; bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write; bio->bi_end_io = end_compressed_bio_write;
if (use_append) {
struct extent_map *em;
struct map_lookup *map;
struct block_device *bdev;
em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
if (IS_ERR(em)) {
kfree(cb);
bio_put(bio);
return BLK_STS_NOTSUPP;
}
map = em->map_lookup;
/* We only support single profile for now */
ASSERT(map->num_stripes == 1);
bdev = map->stripes[0].dev->bdev;
bio_set_dev(bio, bdev);
free_extent_map(em);
}
if (blkcg_css) { if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT; bio->bi_opf |= REQ_CGROUP_PUNT;
kthread_associate_blkcg(blkcg_css); kthread_associate_blkcg(blkcg_css);
...@@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ...@@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
bytes_left = compressed_len; bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
int submit = 0; int submit = 0;
int len;
page = compressed_pages[pg_index]; page = compressed_pages[pg_index];
page->mapping = inode->vfs_inode.i_mapping; page->mapping = inode->vfs_inode.i_mapping;
...@@ -439,9 +465,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ...@@ -439,9 +465,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio, submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
0); 0);
if (pg_index == 0 && use_append)
len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
else
len = bio_add_page(bio, page, PAGE_SIZE, 0);
page->mapping = NULL; page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < if (submit || len < PAGE_SIZE) {
PAGE_SIZE) {
/* /*
* inc the count before we submit the bio so * inc the count before we submit the bio so
* we know the end IO handler won't happen before * we know the end IO handler won't happen before
...@@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ...@@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
} }
bio = btrfs_bio_alloc(first_byte); bio = btrfs_bio_alloc(first_byte);
bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb; bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write; bio->bi_end_io = end_compressed_bio_write;
if (blkcg_css) if (blkcg_css)
bio->bi_opf |= REQ_CGROUP_PUNT; bio->bi_opf |= REQ_CGROUP_PUNT;
/*
* Use bio_add_page() to ensure the bio has at least one
* page.
*/
bio_add_page(bio, page, PAGE_SIZE, 0); bio_add_page(bio, page, PAGE_SIZE, 0);
} }
if (bytes_left < PAGE_SIZE) { if (bytes_left < PAGE_SIZE) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment