Commit 80eabba7 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block

Pull fs meta data unmap optimization from Jens Axboe:
 "A series from Jan Kara, providing a more efficient way for unmapping
  meta data from in the buffer cache than doing it block-by-block.

  Provide a general helper that existing callers can use"

* 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block:
  fs: Remove unmap_underlying_metadata
  fs: Add helper to clean bdev aliases under a bh and use it
  ext2: Use clean_bdev_aliases() instead of iteration
  ext4: Use clean_bdev_aliases() instead of iteration
  direct-io: Use clean_bdev_aliases() instead of handmade iteration
  fs: Provide function to unmap metadata for a range of blocks
parents 852d21ae ce98321b
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/mpage.h> #include <linux/mpage.h>
#include <linux/bit_spinlock.h> #include <linux/bit_spinlock.h>
#include <linux/pagevec.h>
#include <trace/events/block.h> #include <trace/events/block.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
...@@ -1604,37 +1605,80 @@ void create_empty_buffers(struct page *page, ...@@ -1604,37 +1605,80 @@ void create_empty_buffers(struct page *page,
} }
EXPORT_SYMBOL(create_empty_buffers); EXPORT_SYMBOL(create_empty_buffers);
/* /**
* We are taking a block for data and we don't want any output from any * clean_bdev_aliases: clean a range of buffers in block device
* buffer-cache aliases starting from return from that function and * @bdev: Block device to clean buffers in
* until the moment when something will explicitly mark the buffer * @block: Start of a range of blocks to clean
* dirty (hopefully that will not happen until we will free that block ;-) * @len: Number of blocks to clean
* We don't even need to mark it not-uptodate - nobody can expect *
* anything from a newly allocated buffer anyway. We used to used * We are taking a range of blocks for data and we don't want writeback of any
* unmap_buffer() for such invalidation, but that was wrong. We definitely * buffer-cache aliases starting from return from this function and until the
* don't want to mark the alias unmapped, for example - it would confuse * moment when something will explicitly mark the buffer dirty (hopefully that
* anyone who might pick it with bread() afterwards... * will not happen until we will free that block ;-) We don't even need to mark
* * it not-uptodate - nobody can expect anything from a newly allocated buffer
* Also.. Note that bforget() doesn't lock the buffer. So there can * anyway. We used to use unmap_buffer() for such invalidation, but that was
* be writeout I/O going on against recently-freed buffers. We don't * wrong. We definitely don't want to mark the alias unmapped, for example - it
* wait on that I/O in bforget() - it's more efficient to wait on the I/O * would confuse anyone who might pick it with bread() afterwards...
* only if we really need to. That happens here. *
*/ * Also.. Note that bforget() doesn't lock the buffer. So there can be
void unmap_underlying_metadata(struct block_device *bdev, sector_t block) * writeout I/O going on against recently-freed buffers. We don't wait on that
* I/O in bforget() - it's more efficient to wait on the I/O only if we really
* need to. That happens here.
*/
void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{ {
struct buffer_head *old_bh; struct inode *bd_inode = bdev->bd_inode;
struct address_space *bd_mapping = bd_inode->i_mapping;
struct pagevec pvec;
pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
pgoff_t end;
int i;
struct buffer_head *bh;
struct buffer_head *head;
might_sleep(); end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
pagevec_init(&pvec, 0);
while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
old_bh = __find_get_block_slow(bdev, block); index = page->index;
if (old_bh) { if (index > end)
clear_buffer_dirty(old_bh); break;
wait_on_buffer(old_bh); if (!page_has_buffers(page))
clear_buffer_req(old_bh); continue;
__brelse(old_bh); /*
* We use page lock instead of bd_mapping->private_lock
* to pin buffers here since we can afford to sleep and
* it scales better than a global spinlock lock.
*/
lock_page(page);
/* Recheck when the page is locked which pins bhs */
if (!page_has_buffers(page))
goto unlock_page;
head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh))
goto next;
if (bh->b_blocknr >= block + len)
break;
clear_buffer_dirty(bh);
wait_on_buffer(bh);
clear_buffer_req(bh);
next:
bh = bh->b_this_page;
} while (bh != head);
unlock_page:
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
index++;
} }
} }
EXPORT_SYMBOL(unmap_underlying_metadata); EXPORT_SYMBOL(clean_bdev_aliases);
/* /*
* Size is a power-of-two in the range 512..PAGE_SIZE, * Size is a power-of-two in the range 512..PAGE_SIZE,
...@@ -1745,8 +1789,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, ...@@ -1745,8 +1789,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
if (buffer_new(bh)) { if (buffer_new(bh)) {
/* blockdev mappings never come here */ /* blockdev mappings never come here */
clear_buffer_new(bh); clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev, clean_bdev_bh_alias(bh);
bh->b_blocknr);
} }
} }
bh = bh->b_this_page; bh = bh->b_this_page;
...@@ -1992,8 +2035,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, ...@@ -1992,8 +2035,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
} }
if (buffer_new(bh)) { if (buffer_new(bh)) {
unmap_underlying_metadata(bh->b_bdev, clean_bdev_bh_alias(bh);
bh->b_blocknr);
if (PageUptodate(page)) { if (PageUptodate(page)) {
clear_buffer_new(bh); clear_buffer_new(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
...@@ -2633,7 +2675,7 @@ int nobh_write_begin(struct address_space *mapping, ...@@ -2633,7 +2675,7 @@ int nobh_write_begin(struct address_space *mapping,
if (!buffer_mapped(bh)) if (!buffer_mapped(bh))
is_mapped_to_disk = 0; is_mapped_to_disk = 0;
if (buffer_new(bh)) if (buffer_new(bh))
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); clean_bdev_bh_alias(bh);
if (PageUptodate(page)) { if (PageUptodate(page)) {
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
continue; continue;
......
...@@ -842,24 +842,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, ...@@ -842,24 +842,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
return ret; return ret;
} }
/*
* Clean any dirty buffers in the blockdev mapping which alias newly-created
* file blocks. Only called for S_ISREG files - blockdevs do not set
* buffer_new
*/
static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
{
unsigned i;
unsigned nblocks;
nblocks = map_bh->b_size >> dio->inode->i_blkbits;
for (i = 0; i < nblocks; i++) {
unmap_underlying_metadata(map_bh->b_bdev,
map_bh->b_blocknr + i);
}
}
/* /*
* If we are not writing the entire block and get_block() allocated * If we are not writing the entire block and get_block() allocated
* the block for us, we need to fill-in the unused portion of the * the block for us, we need to fill-in the unused portion of the
...@@ -960,11 +942,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, ...@@ -960,11 +942,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
goto do_holes; goto do_holes;
sdio->blocks_available = sdio->blocks_available =
map_bh->b_size >> sdio->blkbits; map_bh->b_size >> blkbits;
sdio->next_block_for_io = sdio->next_block_for_io =
map_bh->b_blocknr << sdio->blkfactor; map_bh->b_blocknr << sdio->blkfactor;
if (buffer_new(map_bh)) if (buffer_new(map_bh)) {
clean_blockdev_aliases(dio, map_bh); clean_bdev_aliases(
map_bh->b_bdev,
map_bh->b_blocknr,
map_bh->b_size >> blkbits);
}
if (!sdio->blkfactor) if (!sdio->blkfactor)
goto do_holes; goto do_holes;
......
...@@ -732,16 +732,13 @@ static int ext2_get_blocks(struct inode *inode, ...@@ -732,16 +732,13 @@ static int ext2_get_blocks(struct inode *inode,
} }
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
int i;
/* /*
* We must unmap blocks before zeroing so that writeback cannot * We must unmap blocks before zeroing so that writeback cannot
* overwrite zeros with stale data from block device page cache. * overwrite zeros with stale data from block device page cache.
*/ */
for (i = 0; i < count; i++) { clean_bdev_aliases(inode->i_sb->s_bdev,
unmap_underlying_metadata(inode->i_sb->s_bdev, le32_to_cpu(chain[depth-1].key),
le32_to_cpu(chain[depth-1].key) + i); count);
}
/* /*
* block must be initialised before we put it in the tree * block must be initialised before we put it in the tree
* so that it's not found by another thread before it's * so that it's not found by another thread before it's
......
...@@ -3777,14 +3777,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, ...@@ -3777,14 +3777,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
return err; return err;
} }
static void unmap_underlying_metadata_blocks(struct block_device *bdev,
sector_t block, int count)
{
int i;
for (i = 0; i < count; i++)
unmap_underlying_metadata(bdev, block + i);
}
/* /*
* Handle EOFBLOCKS_FL flag, clearing it if necessary * Handle EOFBLOCKS_FL flag, clearing it if necessary
*/ */
...@@ -4121,9 +4113,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -4121,9 +4113,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
* new. * new.
*/ */
if (allocated > map->m_len) { if (allocated > map->m_len) {
unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
newblock + map->m_len, allocated - map->m_len);
allocated - map->m_len);
allocated = map->m_len; allocated = map->m_len;
} }
map->m_len = allocated; map->m_len = allocated;
......
...@@ -661,12 +661,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -661,12 +661,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (flags & EXT4_GET_BLOCKS_ZERO && if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) { map->m_flags & EXT4_MAP_NEW) {
ext4_lblk_t i; clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
map->m_len);
for (i = 0; i < map->m_len; i++) {
unmap_underlying_metadata(inode->i_sb->s_bdev,
map->m_pblk + i);
}
ret = ext4_issue_zeroout(inode, map->m_lblk, ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len); map->m_pblk, map->m_len);
if (ret) { if (ret) {
...@@ -1137,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, ...@@ -1137,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (err) if (err)
break; break;
if (buffer_new(bh)) { if (buffer_new(bh)) {
unmap_underlying_metadata(bh->b_bdev, clean_bdev_bh_alias(bh);
bh->b_blocknr);
if (PageUptodate(page)) { if (PageUptodate(page)) {
clear_buffer_new(bh); clear_buffer_new(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
...@@ -2371,11 +2366,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) ...@@ -2371,11 +2366,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
BUG_ON(map->m_len == 0); BUG_ON(map->m_len == 0);
if (map->m_flags & EXT4_MAP_NEW) { if (map->m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = inode->i_sb->s_bdev; clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
int i; map->m_len);
for (i = 0; i < map->m_len; i++)
unmap_underlying_metadata(bdev, map->m_pblk + i);
} }
return 0; return 0;
} }
......
...@@ -457,7 +457,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -457,7 +457,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
} }
if (buffer_new(bh)) { if (buffer_new(bh)) {
clear_buffer_new(bh); clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); clean_bdev_bh_alias(bh);
} }
set_buffer_async_write(bh); set_buffer_async_write(bh);
nr_to_submit++; nr_to_submit++;
......
...@@ -555,8 +555,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, ...@@ -555,8 +555,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
if (mpd->get_block(inode, block_in_file, &map_bh, 1)) if (mpd->get_block(inode, block_in_file, &map_bh, 1))
goto confused; goto confused;
if (buffer_new(&map_bh)) if (buffer_new(&map_bh))
unmap_underlying_metadata(map_bh.b_bdev, clean_bdev_bh_alias(&map_bh);
map_bh.b_blocknr);
if (buffer_boundary(&map_bh)) { if (buffer_boundary(&map_bh)) {
boundary_block = map_bh.b_blocknr; boundary_block = map_bh.b_blocknr;
boundary_bdev = map_bh.b_bdev; boundary_bdev = map_bh.b_bdev;
......
...@@ -765,7 +765,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) ...@@ -765,7 +765,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
} }
// TODO: Instantiate the hole. // TODO: Instantiate the hole.
// clear_buffer_new(bh); // clear_buffer_new(bh);
// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); // clean_bdev_bh_alias(bh);
ntfs_error(vol->sb, "Writing into sparse regions is " ntfs_error(vol->sb, "Writing into sparse regions is "
"not supported yet. Sorry."); "not supported yet. Sorry.");
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
......
...@@ -740,8 +740,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, ...@@ -740,8 +740,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
if (unlikely(was_hole)) { if (unlikely(was_hole)) {
/* We allocated the buffer. */ /* We allocated the buffer. */
unmap_underlying_metadata(bh->b_bdev, clean_bdev_bh_alias(bh);
bh->b_blocknr);
if (bh_end <= pos || bh_pos >= end) if (bh_end <= pos || bh_pos >= end)
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
else else
...@@ -784,7 +783,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, ...@@ -784,7 +783,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
continue; continue;
} }
/* We allocated the buffer. */ /* We allocated the buffer. */
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); clean_bdev_bh_alias(bh);
/* /*
* If the buffer is fully outside the write, zero it, * If the buffer is fully outside the write, zero it,
* set it uptodate, and mark it dirty so it gets * set it uptodate, and mark it dirty so it gets
......
...@@ -630,7 +630,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, ...@@ -630,7 +630,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
if (!buffer_mapped(bh)) { if (!buffer_mapped(bh)) {
map_bh(bh, inode->i_sb, *p_blkno); map_bh(bh, inode->i_sb, *p_blkno);
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); clean_bdev_bh_alias(bh);
} }
if (PageUptodate(page)) { if (PageUptodate(page)) {
......
...@@ -307,8 +307,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg, ...@@ -307,8 +307,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
(unsigned long long)(pos + newb), pos); (unsigned long long)(pos + newb), pos);
bh->b_blocknr = newb + pos; bh->b_blocknr = newb + pos;
unmap_underlying_metadata(bh->b_bdev, clean_bdev_bh_alias(bh);
bh->b_blocknr);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
++j; ++j;
bh = bh->b_this_page; bh = bh->b_this_page;
......
...@@ -1070,8 +1070,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size) ...@@ -1070,8 +1070,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
if (buffer_new(bh)) { if (buffer_new(bh)) {
clear_buffer_new(bh); clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev, clean_bdev_bh_alias(bh);
bh->b_blocknr);
/* /*
* we do not zeroize fragment, because of * we do not zeroize fragment, because of
* if it maped to hole, it already contains zeroes * if it maped to hole, it already contains zeroes
......
...@@ -168,7 +168,12 @@ int inode_has_buffers(struct inode *); ...@@ -168,7 +168,12 @@ int inode_has_buffers(struct inode *);
void invalidate_inode_buffers(struct inode *); void invalidate_inode_buffers(struct inode *);
int remove_inode_buffers(struct inode *inode); int remove_inode_buffers(struct inode *inode);
int sync_mapping_buffers(struct address_space *mapping); int sync_mapping_buffers(struct address_space *mapping);
void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void clean_bdev_aliases(struct block_device *bdev, sector_t block,
sector_t len);
static inline void clean_bdev_bh_alias(struct buffer_head *bh)
{
clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1);
}
void mark_buffer_async_write(struct buffer_head *bh); void mark_buffer_async_write(struct buffer_head *bh);
void __wait_on_buffer(struct buffer_head *); void __wait_on_buffer(struct buffer_head *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment