Commit 6a921de5 authored by Naohiro Aota's avatar Naohiro Aota Committed by David Sterba

btrfs: zoned: introduce space_info->active_total_bytes

The active_total_bytes, like the total_bytes, accounts for the total bytes
of active block groups in the space_info.

With an introduction of active_total_bytes, we can check if the reserved
bytes can be written to the block groups without activating a new block
group. The check is necessary for metadata allocation on zoned
filesystem. We cannot finish a block group, which may require waiting
for the current transaction, from the metadata allocation context.
Instead, we need to ensure the ongoing allocation (reserved bytes) fits
in active block groups.
Signed-off-by: default avatarNaohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 393f646e
...@@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ...@@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
< block_group->zone_unusable); < block_group->zone_unusable);
WARN_ON(block_group->space_info->disk_total WARN_ON(block_group->space_info->disk_total
< block_group->length * factor); < block_group->length * factor);
WARN_ON(block_group->zone_is_active &&
block_group->space_info->active_total_bytes
< block_group->length);
} }
block_group->space_info->total_bytes -= block_group->length; block_group->space_info->total_bytes -= block_group->length;
if (block_group->zone_is_active)
block_group->space_info->active_total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -= block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable); (block_group->length - block_group->zone_unusable);
block_group->space_info->bytes_zone_unusable -= block_group->space_info->bytes_zone_unusable -=
...@@ -2107,7 +2112,8 @@ static int read_one_block_group(struct btrfs_fs_info *info, ...@@ -2107,7 +2112,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
trace_btrfs_add_block_group(info, cache, 0); trace_btrfs_add_block_group(info, cache, 0);
btrfs_update_space_info(info, cache->flags, cache->length, btrfs_update_space_info(info, cache->flags, cache->length,
cache->used, cache->bytes_super, cache->used, cache->bytes_super,
cache->zone_unusable, &space_info); cache->zone_unusable, cache->zone_is_active,
&space_info);
cache->space_info = space_info; cache->space_info = space_info;
...@@ -2177,7 +2183,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) ...@@ -2177,7 +2183,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
} }
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len, btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, 0, &space_info); 0, 0, false, &space_info);
bg->space_info = space_info; bg->space_info = space_info;
link_block_group(bg); link_block_group(bg);
...@@ -2558,7 +2564,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran ...@@ -2558,7 +2564,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
trace_btrfs_add_block_group(fs_info, cache, 1); trace_btrfs_add_block_group(fs_info, cache, 1);
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, cache->zone_unusable, cache->bytes_super, cache->zone_unusable,
&cache->space_info); cache->zone_is_active, &cache->space_info);
btrfs_update_global_block_rsv(fs_info); btrfs_update_global_block_rsv(fs_info);
link_block_group(cache); link_block_group(cache);
......
...@@ -295,7 +295,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) ...@@ -295,7 +295,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used, u64 total_bytes, u64 bytes_used,
u64 bytes_readonly, u64 bytes_zone_unusable, u64 bytes_readonly, u64 bytes_zone_unusable,
struct btrfs_space_info **space_info) bool active, struct btrfs_space_info **space_info)
{ {
struct btrfs_space_info *found; struct btrfs_space_info *found;
int factor; int factor;
...@@ -306,6 +306,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -306,6 +306,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
ASSERT(found); ASSERT(found);
spin_lock(&found->lock); spin_lock(&found->lock);
found->total_bytes += total_bytes; found->total_bytes += total_bytes;
if (active)
found->active_total_bytes += total_bytes;
found->disk_total += total_bytes * factor; found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used; found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor; found->disk_used += bytes_used * factor;
...@@ -369,6 +371,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, ...@@ -369,6 +371,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
return avail; return avail;
} }
static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info)
{
/*
* On regular filesystem, all total_bytes are always writable. On zoned
* filesystem, there may be a limitation imposed by max_active_zones.
* For metadata allocation, we cannot finish an existing active block
* group to avoid a deadlock. Thus, we need to consider only the active
* groups to be writable for metadata space.
*/
if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
return space_info->total_bytes;
return space_info->active_total_bytes;
}
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes, struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush)
...@@ -383,7 +401,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, ...@@ -383,7 +401,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
used = btrfs_space_info_used(space_info, true); used = btrfs_space_info_used(space_info, true);
avail = calc_available_free_space(fs_info, space_info, flush); avail = calc_available_free_space(fs_info, space_info, flush);
if (used + bytes < space_info->total_bytes + avail) if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
return 1; return 1;
return 0; return 0;
} }
...@@ -419,7 +437,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, ...@@ -419,7 +437,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
ticket = list_first_entry(head, struct reserve_ticket, list); ticket = list_first_entry(head, struct reserve_ticket, list);
/* Check and see if our ticket can be satisfied now. */ /* Check and see if our ticket can be satisfied now. */
if ((used + ticket->bytes <= space_info->total_bytes) || if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
btrfs_can_overcommit(fs_info, space_info, ticket->bytes, btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
flush)) { flush)) {
btrfs_space_info_update_bytes_may_use(fs_info, btrfs_space_info_update_bytes_may_use(fs_info,
...@@ -750,6 +768,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, ...@@ -750,6 +768,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
{ {
u64 used; u64 used;
u64 avail; u64 avail;
u64 total;
u64 to_reclaim = space_info->reclaim_size; u64 to_reclaim = space_info->reclaim_size;
lockdep_assert_held(&space_info->lock); lockdep_assert_held(&space_info->lock);
...@@ -764,8 +783,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, ...@@ -764,8 +783,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
* space. If that's the case add in our overage so we make sure to put * space. If that's the case add in our overage so we make sure to put
* appropriate pressure on the flushing state machine. * appropriate pressure on the flushing state machine.
*/ */
if (space_info->total_bytes + avail < used) total = writable_total_bytes(fs_info, space_info);
to_reclaim += used - (space_info->total_bytes + avail); if (total + avail < used)
to_reclaim += used - (total + avail);
return to_reclaim; return to_reclaim;
} }
...@@ -775,9 +795,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ...@@ -775,9 +795,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
{ {
u64 global_rsv_size = fs_info->global_block_rsv.reserved; u64 global_rsv_size = fs_info->global_block_rsv.reserved;
u64 ordered, delalloc; u64 ordered, delalloc;
u64 thresh = div_factor_fine(space_info->total_bytes, 90); u64 total = writable_total_bytes(fs_info, space_info);
u64 thresh;
u64 used; u64 used;
thresh = div_factor_fine(total, 90);
lockdep_assert_held(&space_info->lock); lockdep_assert_held(&space_info->lock);
/* If we're just plain full then async reclaim just slows us down. */ /* If we're just plain full then async reclaim just slows us down. */
...@@ -839,8 +862,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ...@@ -839,8 +862,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
BTRFS_RESERVE_FLUSH_ALL); BTRFS_RESERVE_FLUSH_ALL);
used = space_info->bytes_used + space_info->bytes_reserved + used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_readonly + global_rsv_size; space_info->bytes_readonly + global_rsv_size;
if (used < space_info->total_bytes) if (used < total)
thresh += space_info->total_bytes - used; thresh += total - used;
thresh >>= space_info->clamp; thresh >>= space_info->clamp;
used = space_info->bytes_pinned; used = space_info->bytes_pinned;
...@@ -1557,7 +1580,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, ...@@ -1557,7 +1580,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* can_overcommit() to ensure we can overcommit to continue. * can_overcommit() to ensure we can overcommit to continue.
*/ */
if (!pending_tickets && if (!pending_tickets &&
((used + orig_bytes <= space_info->total_bytes) || ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
btrfs_space_info_update_bytes_may_use(fs_info, space_info, btrfs_space_info_update_bytes_may_use(fs_info, space_info,
orig_bytes); orig_bytes);
......
...@@ -19,6 +19,8 @@ struct btrfs_space_info { ...@@ -19,6 +19,8 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */ delalloc/allocations */
u64 bytes_readonly; /* total bytes that are read only */ u64 bytes_readonly; /* total bytes that are read only */
/* Total bytes in the space, but only accounts active block groups. */
u64 active_total_bytes;
u64 bytes_zone_unusable; /* total bytes that are unusable until u64 bytes_zone_unusable; /* total bytes that are unusable until
resetting the device zone */ resetting the device zone */
...@@ -124,7 +126,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); ...@@ -124,7 +126,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used, u64 total_bytes, u64 bytes_used,
u64 bytes_readonly, u64 bytes_zone_unusable, u64 bytes_readonly, u64 bytes_zone_unusable,
struct btrfs_space_info **space_info); bool active, struct btrfs_space_info **space_info);
void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info, void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
u64 chunk_size); u64 chunk_size);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
......
...@@ -1849,6 +1849,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, ...@@ -1849,6 +1849,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
bool btrfs_zone_activate(struct btrfs_block_group *block_group) bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{ {
struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_space_info *space_info = block_group->space_info;
struct map_lookup *map; struct map_lookup *map;
struct btrfs_device *device; struct btrfs_device *device;
u64 physical; u64 physical;
...@@ -1860,6 +1861,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) ...@@ -1860,6 +1861,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
map = block_group->physical_map; map = block_group->physical_map;
spin_lock(&space_info->lock);
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (block_group->zone_is_active) { if (block_group->zone_is_active) {
ret = true; ret = true;
...@@ -1888,7 +1890,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) ...@@ -1888,7 +1890,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
/* Successfully activated all the zones */ /* Successfully activated all the zones */
block_group->zone_is_active = 1; block_group->zone_is_active = 1;
space_info->active_total_bytes += block_group->length;
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
btrfs_try_granting_tickets(fs_info, space_info);
spin_unlock(&space_info->lock);
/* For the active block group list */ /* For the active block group list */
btrfs_get_block_group(block_group); btrfs_get_block_group(block_group);
...@@ -1901,6 +1906,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) ...@@ -1901,6 +1906,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
out_unlock: out_unlock:
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment