Commit 243cf8d1 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba

btrfs: calculate file system wide queue limit for zoned mode

To be able to split a write into properly sized zone append commands,
we need a queue_limits structure that contains the least common
denominator suitable for all devices.
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 35a8d7da
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#ifndef BTRFS_FS_H #ifndef BTRFS_FS_H
#define BTRFS_FS_H #define BTRFS_FS_H
#include <linux/blkdev.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/btrfs_tree.h> #include <linux/btrfs_tree.h>
#include <linux/sizes.h> #include <linux/sizes.h>
...@@ -748,8 +749,10 @@ struct btrfs_fs_info { ...@@ -748,8 +749,10 @@ struct btrfs_fs_info {
*/ */
u64 zone_size; u64 zone_size;
/* Max size to emit ZONE_APPEND write command */ /* Constraints for ZONE_APPEND commands: */
struct queue_limits limits;
u64 max_zone_append_size; u64 max_zone_append_size;
struct mutex zoned_meta_io_lock; struct mutex zoned_meta_io_lock;
spinlock_t treelog_bg_lock; spinlock_t treelog_bg_lock;
u64 treelog_bg; u64 treelog_bg;
......
...@@ -421,25 +421,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) ...@@ -421,25 +421,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
nr_sectors = bdev_nr_sectors(bdev); nr_sectors = bdev_nr_sectors(bdev);
zone_info->zone_size_shift = ilog2(zone_info->zone_size); zone_info->zone_size_shift = ilog2(zone_info->zone_size);
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
/*
* We limit max_zone_append_size also by max_segments *
* PAGE_SIZE. Technically, we can have multiple pages per segment. But,
* since btrfs adds the pages one by one to a bio, and btrfs cannot
* increase the metadata reservation even if it increases the number of
* extents, it is safe to stick with the limit.
*
* With the zoned emulation, we can have non-zoned device on the zoned
* mode. In this case, we don't have a valid max zone append size. So,
* use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
*/
if (bdev_is_zoned(bdev)) {
zone_info->max_zone_append_size = min_t(u64,
(u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
} else {
zone_info->max_zone_append_size =
(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
}
if (!IS_ALIGNED(nr_sectors, zone_sectors)) if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++; zone_info->nr_zones++;
...@@ -719,9 +700,9 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info) ...@@ -719,9 +700,9 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
{ {
struct queue_limits *lim = &fs_info->limits;
struct btrfs_device *device; struct btrfs_device *device;
u64 zone_size = 0; u64 zone_size = 0;
u64 max_zone_append_size = 0;
int ret; int ret;
/* /*
...@@ -731,6 +712,8 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) ...@@ -731,6 +712,8 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
if (!btrfs_fs_incompat(fs_info, ZONED)) if (!btrfs_fs_incompat(fs_info, ZONED))
return btrfs_check_for_zoned_device(fs_info); return btrfs_check_for_zoned_device(fs_info);
blk_set_stacking_limits(lim);
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
struct btrfs_zoned_device_info *zone_info = device->zone_info; struct btrfs_zoned_device_info *zone_info = device->zone_info;
...@@ -745,10 +728,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) ...@@ -745,10 +728,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
zone_info->zone_size, zone_size); zone_info->zone_size, zone_size);
return -EINVAL; return -EINVAL;
} }
if (!max_zone_append_size ||
(zone_info->max_zone_append_size && /*
zone_info->max_zone_append_size < max_zone_append_size)) * With the zoned emulation, we can have non-zoned device on the
max_zone_append_size = zone_info->max_zone_append_size; * zoned mode. In this case, we don't have a valid max zone
* append size.
*/
if (bdev_is_zoned(device->bdev)) {
blk_stack_limits(lim,
&bdev_get_queue(device->bdev)->limits,
0);
}
} }
/* /*
...@@ -769,7 +759,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) ...@@ -769,7 +759,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
} }
fs_info->zone_size = zone_size; fs_info->zone_size = zone_size;
fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size, /*
* Also limit max_zone_append_size by max_segments * PAGE_SIZE.
* Technically, we can have multiple pages per segment. But, since
* we add the pages one by one to a bio, and cannot increase the
* metadata reservation even if it increases the number of extents, it
* is safe to stick with the limit.
*/
fs_info->max_zone_append_size = ALIGN_DOWN(
min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT,
(u64)lim->max_sectors << SECTOR_SHIFT,
(u64)lim->max_segments << PAGE_SHIFT),
fs_info->sectorsize); fs_info->sectorsize);
fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
if (fs_info->max_zone_append_size < fs_info->max_extent_size) if (fs_info->max_zone_append_size < fs_info->max_extent_size)
......
...@@ -20,7 +20,6 @@ struct btrfs_zoned_device_info { ...@@ -20,7 +20,6 @@ struct btrfs_zoned_device_info {
*/ */
u64 zone_size; u64 zone_size;
u8 zone_size_shift; u8 zone_size_shift;
u64 max_zone_append_size;
u32 nr_zones; u32 nr_zones;
unsigned int max_active_zones; unsigned int max_active_zones;
atomic_t active_zones_left; atomic_t active_zones_left;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment