Commit 04f0847c authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba

btrfs: don't rely on unchanging ->bi_bdev for zone append remaps

btrfs_record_physical_zoned relies on a bio->bi_bdev samples in the
bio_end_io handler to find the reverse map for remapping the zone append
write, but stacked block device drivers can and usually do change bi_bdev
when sending on the bio to a lower device.  This can happen e.g. with the
nvme-multipath driver when a NVMe SSD sets the shared namespace bit.

But there is no real need for the bdev in btrfs_record_physical_zoned,
as it is only passed to btrfs_rmap_block, which uses it to pick the
mapping to report if there are multiple reverse mappings.  As zone
writes can only do simple non-mirror writes right now, and anything
more complex will use the stripe tree there is no chance of the multiple
mappings case actually happening.

Instead open code the subset of btrfs_rmap_block in
btrfs_record_physical_zoned, which also removes a memory allocation and
remove the bdev field in the ordered extent.

Fixes: d8e3fb10 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent fdf9a37d
...@@ -3162,8 +3162,8 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ...@@ -3162,8 +3162,8 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out; goto out;
} }
/* A valid bdev implies a write on a sequential zone */ /* A valid ->physical implies a write on a sequential zone. */
if (ordered_extent->bdev) { if (ordered_extent->physical != (u64)-1) {
btrfs_rewrite_logical_zoned(ordered_extent); btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes); ordered_extent->disk_num_bytes);
......
...@@ -157,7 +157,6 @@ struct btrfs_ordered_extent { ...@@ -157,7 +157,6 @@ struct btrfs_ordered_extent {
* command in a workqueue context * command in a workqueue context
*/ */
u64 physical; u64 physical;
struct block_device *bdev;
}; };
static inline void static inline void
......
...@@ -1676,8 +1676,6 @@ void btrfs_record_physical_zoned(struct btrfs_bio *bbio) ...@@ -1676,8 +1676,6 @@ void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
return; return;
ordered->physical = physical; ordered->physical = physical;
ordered->bdev = bbio->bio.bi_bdev;
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
} }
...@@ -1689,43 +1687,46 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) ...@@ -1689,43 +1687,46 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
struct extent_map *em; struct extent_map *em;
struct btrfs_ordered_sum *sum; struct btrfs_ordered_sum *sum;
u64 orig_logical = ordered->disk_bytenr; u64 orig_logical = ordered->disk_bytenr;
u64 *logical = NULL; struct map_lookup *map;
int nr, stripe_len; u64 physical = ordered->physical;
u64 chunk_start_phys;
u64 logical;
/* Zoned devices should not have partitions. So, we can assume it is 0 */ em = btrfs_get_chunk_map(fs_info, orig_logical, 1);
ASSERT(!bdev_is_partition(ordered->bdev)); if (IS_ERR(em))
if (WARN_ON(!ordered->bdev))
return; return;
map = em->map_lookup;
chunk_start_phys = map->stripes[0].physical;
if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev, if (WARN_ON_ONCE(map->num_stripes > 1) ||
ordered->physical, &logical, &nr, WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) ||
&stripe_len))) WARN_ON_ONCE(physical < chunk_start_phys) ||
goto out; WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) {
free_extent_map(em);
WARN_ON(nr != 1); return;
}
logical = em->start + (physical - map->stripes[0].physical);
free_extent_map(em);
if (orig_logical == *logical) if (orig_logical == logical)
goto out; return;
ordered->disk_bytenr = *logical; ordered->disk_bytenr = logical;
em_tree = &inode->extent_tree; em_tree = &inode->extent_tree;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, ordered->file_offset, em = search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes); ordered->num_bytes);
em->block_start = *logical; em->block_start = logical;
free_extent_map(em); free_extent_map(em);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
list_for_each_entry(sum, &ordered->list, list) { list_for_each_entry(sum, &ordered->list, list) {
if (*logical < orig_logical) if (logical < orig_logical)
sum->bytenr -= orig_logical - *logical; sum->bytenr -= orig_logical - logical;
else else
sum->bytenr += *logical - orig_logical; sum->bytenr += logical - orig_logical;
} }
out:
kfree(logical);
} }
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment