Commit d034cdb4 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: lock subpage ranges in one go for writepage_delalloc()

If we have a subpage range like this for a 16K page with 4K sectorsize:

    0     4K     8K     12K     16K
    |/////|      |//////|       |

    |/////| = dirty range

Currently writepage_delalloc() would go through the following steps:

- lock range [0, 4K)
- run delalloc range for [0, 4K)
- lock range [8K, 12K)
- run delalloc range for [8K 12K)

So far it's fine for regular subpage writeback, as
btrfs_run_delalloc_range() can only go into one of run_delalloc_nocow(),
cow_file_range() and run_delalloc_compressed().

But there is a special case for zoned subpage, where we will go
through run_delalloc_cow(), which would create the ordered extent for the
range and immediately submit the range.
This would unlock the whole page range, causing all kinds of different
ASSERT()s related to locked page.

Address the page unlocking problem of run_delalloc_cow(), by changing
the workflow to the following one:

- lock range [0, 4K)
- lock range [8K, 12K)
- run delalloc range for [0, 4K)
- run delalloc range for [8K, 12K)

So that run_delalloc_cow() can only unlock the full page until the
last lock user released.

To do that:

- Utilize subpage locked bitmap
  So for every delalloc range we found, call
  btrfs_folio_set_writer_lock() to populate the subpage locked bitmap,
  and later btrfs_folio_end_all_writers() if the page is fully unlocked.

  So we know there is a delalloc range that needs to be run later.

- Save the @delalloc_end as @last_delalloc_end inside writepage_delalloc()
  Since subpage locked bitmap is only for ranges inside the page,
  meanwhile we can have delalloc range ends beyond our page boundary,
  we have to save the @last_delalloc_end just in case it's beyond our
  page boundary.

Although there is one extra point to notice:

- We need to handle errors in previous iteration
  Since we can have multiple locked delalloc ranges we have to call
  run_delalloc_ranges() multiple times.
  If we hit an error half way, we still need to unlock the remaining
  ranges.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent bca707e5
...@@ -1226,13 +1226,23 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages, ...@@ -1226,13 +1226,23 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
struct page *page, struct writeback_control *wbc) struct page *page, struct writeback_control *wbc)
{ {
struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
struct folio *folio = page_folio(page);
const bool is_subpage = btrfs_is_subpage(fs_info, page->mapping);
const u64 page_start = page_offset(page); const u64 page_start = page_offset(page);
const u64 page_end = page_start + PAGE_SIZE - 1; const u64 page_end = page_start + PAGE_SIZE - 1;
/*
* Save the last found delalloc end. As the delalloc end can go beyond
* page boundary, thus we cannot rely on subpage bitmap to locate the
* last delalloc end.
*/
u64 last_delalloc_end = 0;
u64 delalloc_start = page_start; u64 delalloc_start = page_start;
u64 delalloc_end = page_end; u64 delalloc_end = page_end;
u64 delalloc_to_write = 0; u64 delalloc_to_write = 0;
int ret = 0; int ret = 0;
/* Lock all (subpage) delalloc ranges inside the page first. */
while (delalloc_start < page_end) { while (delalloc_start < page_end) {
delalloc_end = page_end; delalloc_end = page_end;
if (!find_lock_delalloc_range(&inode->vfs_inode, page, if (!find_lock_delalloc_range(&inode->vfs_inode, page,
...@@ -1240,15 +1250,95 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, ...@@ -1240,15 +1250,95 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
delalloc_start = delalloc_end + 1; delalloc_start = delalloc_end + 1;
continue; continue;
} }
btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start,
ret = btrfs_run_delalloc_range(inode, page, delalloc_start, min(delalloc_end, page_end) + 1 -
delalloc_end, wbc); delalloc_start);
if (ret < 0) last_delalloc_end = delalloc_end;
return ret;
delalloc_start = delalloc_end + 1; delalloc_start = delalloc_end + 1;
} }
delalloc_start = page_start;
if (!last_delalloc_end)
goto out;
/* Run the delalloc ranges for the above locked ranges. */
while (delalloc_start < page_end) {
u64 found_start;
u32 found_len;
bool found;
if (!is_subpage) {
/*
* For non-subpage case, the found delalloc range must
* cover this page and there must be only one locked
* delalloc range.
*/
found_start = page_start;
found_len = last_delalloc_end + 1 - found_start;
found = true;
} else {
found = btrfs_subpage_find_writer_locked(fs_info, folio,
delalloc_start, &found_start, &found_len);
}
if (!found)
break;
/*
* The subpage range covers the last sector, the delalloc range may
* end beyond the page boundary, use the saved delalloc_end
* instead.
*/
if (found_start + found_len >= page_end)
found_len = last_delalloc_end + 1 - found_start;
if (ret >= 0) {
/* No errors hit so far, run the current delalloc range. */
ret = btrfs_run_delalloc_range(inode, page, found_start,
found_start + found_len - 1,
wbc);
} else {
/*
* We've hit an error during previous delalloc range,
* have to cleanup the remaining locked ranges.
*/
unlock_extent(&inode->io_tree, found_start,
found_start + found_len - 1, NULL);
__unlock_for_delalloc(&inode->vfs_inode, page, found_start,
found_start + found_len - 1);
}
/*
* We can hit btrfs_run_delalloc_range() with >0 return value.
*
* This happens when either the IO is already done and page
* unlocked (inline) or the IO submission and page unlock would
* be handled as async (compression).
*
* Inline is only possible for regular sectorsize for now.
*
* Compression is possible for both subpage and regular cases,
* but even for subpage compression only happens for page aligned
* range, thus the found delalloc range must go beyond current
* page.
*/
if (ret > 0)
ASSERT(!is_subpage || found_start + found_len >= page_end);
/*
* Above btrfs_run_delalloc_range() may have unlocked the page,
* thus for the last range, we cannot touch the page anymore.
*/
if (found_start + found_len >= last_delalloc_end + 1)
break;
delalloc_start = found_start + found_len;
}
if (ret < 0)
return ret;
out:
if (last_delalloc_end)
delalloc_end = last_delalloc_end;
else
delalloc_end = page_end;
/* /*
* delalloc_end is already one less than the total length, so * delalloc_end is already one less than the total length, so
* we don't subtract one from PAGE_SIZE * we don't subtract one from PAGE_SIZE
...@@ -1520,7 +1610,8 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl ...@@ -1520,7 +1610,8 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
PAGE_SIZE, !ret); PAGE_SIZE, !ret);
mapping_set_error(page->mapping, ret); mapping_set_error(page->mapping, ret);
} }
unlock_page(page);
btrfs_folio_end_all_writers(inode_to_fs_info(inode), folio);
ASSERT(ret <= 0); ASSERT(ret <= 0);
return ret; return ret;
} }
......
...@@ -864,6 +864,7 @@ bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info, ...@@ -864,6 +864,7 @@ bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
*/ */
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio) void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio)
{ {
struct btrfs_subpage *subpage = folio_get_private(folio);
u64 folio_start = folio_pos(folio); u64 folio_start = folio_pos(folio);
u64 cur = folio_start; u64 cur = folio_start;
...@@ -873,6 +874,11 @@ void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct fol ...@@ -873,6 +874,11 @@ void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct fol
return; return;
} }
/* The page has no new delalloc range locked on it. Just plain unlock. */
if (atomic_read(&subpage->writers) == 0) {
folio_unlock(folio);
return;
}
while (cur < folio_start + PAGE_SIZE) { while (cur < folio_start + PAGE_SIZE) {
u64 found_start; u64 found_start;
u32 found_len; u32 found_len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment