Commit 9b7fad20 authored by Dave Chinner's avatar Dave Chinner

Merge branch 'xfs-4.8-iomap-write' into for-next

parents 07931b7b 3c2bdc91
...@@ -4,6 +4,7 @@ config XFS_FS ...@@ -4,6 +4,7 @@ config XFS_FS
depends on (64BIT || LBDAF) depends on (64BIT || LBDAF)
select EXPORTFS select EXPORTFS
select LIBCRC32C select LIBCRC32C
select FS_IOMAP
help help
XFS is a high performance journaling filesystem which originated XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can on the SGI IRIX platform. It is completely multi-threaded, can
......
...@@ -1143,6 +1143,8 @@ __xfs_get_blocks( ...@@ -1143,6 +1143,8 @@ __xfs_get_blocks(
ssize_t size; ssize_t size;
int new = 0; int new = 0;
BUG_ON(create && !direct);
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
...@@ -1150,22 +1152,14 @@ __xfs_get_blocks( ...@@ -1150,22 +1152,14 @@ __xfs_get_blocks(
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size; size = bh_result->b_size;
if (!create && direct && offset >= i_size_read(inode)) if (!create && offset >= i_size_read(inode))
return 0; return 0;
/* /*
* Direct I/O is usually done on preallocated files, so try getting * Direct I/O is usually done on preallocated files, so try getting
* a block mapping without an exclusive lock first. For buffered * a block mapping without an exclusive lock first.
* writes we already have the exclusive iolock anyway, so avoiding
* a lock roundtrip here by taking the ilock exclusive from the
* beginning is a useful micro optimization.
*/ */
if (create && !direct) {
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockmode);
} else {
lockmode = xfs_ilock_data_map_shared(ip); lockmode = xfs_ilock_data_map_shared(ip);
}
ASSERT(offset <= mp->m_super->s_maxbytes); ASSERT(offset <= mp->m_super->s_maxbytes);
if (offset + size > mp->m_super->s_maxbytes) if (offset + size > mp->m_super->s_maxbytes)
...@@ -1184,7 +1178,6 @@ __xfs_get_blocks( ...@@ -1184,7 +1178,6 @@ __xfs_get_blocks(
(imap.br_startblock == HOLESTARTBLOCK || (imap.br_startblock == HOLESTARTBLOCK ||
imap.br_startblock == DELAYSTARTBLOCK) || imap.br_startblock == DELAYSTARTBLOCK) ||
(IS_DAX(inode) && ISUNWRITTEN(&imap)))) { (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
if (direct || xfs_get_extsz_hint(ip)) {
/* /*
* xfs_iomap_write_direct() expects the shared lock. It * xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return. * is unlocked on return.
...@@ -1198,23 +1191,6 @@ __xfs_get_blocks( ...@@ -1198,23 +1191,6 @@ __xfs_get_blocks(
return error; return error;
new = 1; new = 1;
} else {
/*
* Delalloc reservations do not require a transaction,
* we can go on without dropping the lock here. If we
* are allocating a new delalloc block, make sure that
* we set the new flag so that we mark the buffer new so
* that we know that it is newly allocated if the write
* fails.
*/
if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
new = 1;
error = xfs_iomap_write_delay(ip, offset, size, &imap);
if (error)
goto out_unlock;
xfs_iunlock(ip, lockmode);
}
trace_xfs_get_blocks_alloc(ip, offset, size, trace_xfs_get_blocks_alloc(ip, offset, size,
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
: XFS_IO_DELALLOC, &imap); : XFS_IO_DELALLOC, &imap);
...@@ -1235,9 +1211,7 @@ __xfs_get_blocks( ...@@ -1235,9 +1211,7 @@ __xfs_get_blocks(
} }
/* trim mapping down to size requested */ /* trim mapping down to size requested */
if (direct || size > (1 << inode->i_blkbits)) xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
xfs_map_trim_size(inode, iblock, bh_result,
&imap, offset, size);
/* /*
* For unwritten extents do not report a disk address in the buffered * For unwritten extents do not report a disk address in the buffered
...@@ -1250,7 +1224,7 @@ __xfs_get_blocks( ...@@ -1250,7 +1224,7 @@ __xfs_get_blocks(
if (ISUNWRITTEN(&imap)) if (ISUNWRITTEN(&imap))
set_buffer_unwritten(bh_result); set_buffer_unwritten(bh_result);
/* direct IO needs special help */ /* direct IO needs special help */
if (create && direct) { if (create) {
if (dax_fault) if (dax_fault)
ASSERT(!ISUNWRITTEN(&imap)); ASSERT(!ISUNWRITTEN(&imap));
else else
...@@ -1279,14 +1253,7 @@ __xfs_get_blocks( ...@@ -1279,14 +1253,7 @@ __xfs_get_blocks(
(new || ISUNWRITTEN(&imap)))) (new || ISUNWRITTEN(&imap))))
set_buffer_new(bh_result); set_buffer_new(bh_result);
if (imap.br_startblock == DELAYSTARTBLOCK) { BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
BUG_ON(direct);
if (create) {
set_buffer_uptodate(bh_result);
set_buffer_mapped(bh_result);
set_buffer_delay(bh_result);
}
}
return 0; return 0;
...@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO( ...@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
xfs_get_blocks_direct, endio, NULL, flags); xfs_get_blocks_direct, endio, NULL, flags);
} }
/*
* Punch out the delalloc blocks we have already allocated.
*
* Don't bother with xfs_setattr given that nothing can have made it to disk yet
* as the page is still locked at this point.
*/
STATIC void
xfs_vm_kill_delalloc_range(
struct inode *inode,
loff_t start,
loff_t end)
{
struct xfs_inode *ip = XFS_I(inode);
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error;
start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
if (end_fsb <= start_fsb)
return;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
end_fsb - start_fsb);
if (error) {
/* something screwed, just bail */
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_alert(ip->i_mount,
"xfs_vm_write_failed: unable to clean up ino %lld",
ip->i_ino);
}
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
STATIC void
xfs_vm_write_failed(
struct inode *inode,
struct page *page,
loff_t pos,
unsigned len)
{
loff_t block_offset;
loff_t block_start;
loff_t block_end;
loff_t from = pos & (PAGE_SIZE - 1);
loff_t to = from + len;
struct buffer_head *bh, *head;
struct xfs_mount *mp = XFS_I(inode)->i_mount;
/*
* The request pos offset might be 32 or 64 bit, this is all fine
* on 64-bit platform. However, for 64-bit pos request on 32-bit
* platform, the high 32-bit will be masked off if we evaluate the
* block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
* 0xfffff000 as an unsigned long, hence the result is incorrect
* which could cause the following ASSERT failed in most cases.
* In order to avoid this, we can evaluate the block_offset of the
* start of the page by using shifts rather than masks the mismatch
* problem.
*/
block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
ASSERT(block_offset + from == pos);
head = page_buffers(page);
block_start = 0;
for (bh = head; bh != head || !block_start;
bh = bh->b_this_page, block_start = block_end,
block_offset += bh->b_size) {
block_end = block_start + bh->b_size;
/* skip buffers before the write */
if (block_end <= from)
continue;
/* if the buffer is after the write, we're done */
if (block_start >= to)
break;
/*
* Process delalloc and unwritten buffers beyond EOF. We can
* encounter unwritten buffers in the event that a file has
* post-EOF unwritten extents and an extending write happens to
* fail (e.g., an unaligned write that also involves a delalloc
* to the same page).
*/
if (!buffer_delay(bh) && !buffer_unwritten(bh))
continue;
if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
block_offset < i_size_read(inode))
continue;
if (buffer_delay(bh))
xfs_vm_kill_delalloc_range(inode, block_offset,
block_offset + bh->b_size);
/*
* This buffer does not contain data anymore. make sure anyone
* who finds it knows that for certain.
*/
clear_buffer_delay(bh);
clear_buffer_uptodate(bh);
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_dirty(bh);
clear_buffer_unwritten(bh);
}
}
/*
* This used to call block_write_begin(), but it unlocks and releases the page
* on error, and we need that page to be able to punch stale delalloc blocks out
* on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
* the appropriate point.
*/
STATIC int
xfs_vm_write_begin(
struct file *file,
struct address_space *mapping,
loff_t pos,
unsigned len,
unsigned flags,
struct page **pagep,
void **fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int status;
struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
ASSERT(len <= PAGE_SIZE);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
status = __block_write_begin(page, pos, len, xfs_get_blocks);
if (xfs_mp_fail_writes(mp))
status = -EIO;
if (unlikely(status)) {
struct inode *inode = mapping->host;
size_t isize = i_size_read(inode);
xfs_vm_write_failed(inode, page, pos, len);
unlock_page(page);
/*
* If the write is beyond EOF, we only want to kill blocks
* allocated in this write, not blocks that were previously
* written successfully.
*/
if (xfs_mp_fail_writes(mp))
isize = 0;
if (pos + len > isize) {
ssize_t start = max_t(ssize_t, pos, isize);
truncate_pagecache_range(inode, start, pos + len);
}
put_page(page);
page = NULL;
}
*pagep = page;
return status;
}
/*
* On failure, we only need to kill delalloc blocks beyond EOF in the range of
* this specific write because they will never be written. Previous writes
* beyond EOF where block allocation succeeded do not need to be trashed, so
* only new blocks from this write should be trashed. For blocks within
* EOF, generic_write_end() zeros them so they are safe to leave alone and be
* written with all the other valid data.
*/
STATIC int
xfs_vm_write_end(
struct file *file,
struct address_space *mapping,
loff_t pos,
unsigned len,
unsigned copied,
struct page *page,
void *fsdata)
{
int ret;
ASSERT(len <= PAGE_SIZE);
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
if (unlikely(ret < len)) {
struct inode *inode = mapping->host;
size_t isize = i_size_read(inode);
loff_t to = pos + len;
if (to > isize) {
/* only kill blocks in this write beyond EOF */
if (pos > isize)
isize = pos;
xfs_vm_kill_delalloc_range(inode, isize, to);
truncate_pagecache_range(inode, isize, to);
}
}
return ret;
}
STATIC sector_t STATIC sector_t
xfs_vm_bmap( xfs_vm_bmap(
struct address_space *mapping, struct address_space *mapping,
...@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = { ...@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
.set_page_dirty = xfs_vm_set_page_dirty, .set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage, .releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage, .invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,
.write_end = xfs_vm_write_end,
.bmap = xfs_vm_bmap, .bmap = xfs_vm_bmap,
.direct_IO = xfs_vm_direct_IO, .direct_IO = xfs_vm_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
......
This diff is collapsed.
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "xfs_log.h" #include "xfs_log.h"
#include "xfs_icache.h" #include "xfs_icache.h"
#include "xfs_pnfs.h" #include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include <linux/dcache.h> #include <linux/dcache.h>
#include <linux/falloc.h> #include <linux/falloc.h>
...@@ -80,61 +81,17 @@ xfs_rw_ilock_demote( ...@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
} }
/* /*
* xfs_iozero clears the specified range supplied via the page cache (except in * Clear the specified ranges to zero through either the pagecache or DAX.
* the DAX case). Writes through the page cache will allocate blocks over holes, * Holes and unwritten extents will be left as-is as they already are zeroed.
* though the callers usually map the holes first and avoid them. If a block is
* not completely zeroed, then it will be read from disk before being partially
* zeroed.
*
* In the DAX case, we can just directly write to the underlying pages. This
* will not allocate blocks, but will avoid holes and unwritten extents and so
* not do unnecessary work.
*/ */
int int
xfs_iozero( xfs_zero_range(
struct xfs_inode *ip, /* inode */ struct xfs_inode *ip,
loff_t pos, /* offset in file */ xfs_off_t pos,
size_t count) /* size of data to zero */ xfs_off_t count,
bool *did_zero)
{ {
struct page *page; return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
struct address_space *mapping;
int status = 0;
mapping = VFS_I(ip)->i_mapping;
do {
unsigned offset, bytes;
void *fsdata;
offset = (pos & (PAGE_SIZE -1)); /* Within page */
bytes = PAGE_SIZE - offset;
if (bytes > count)
bytes = count;
if (IS_DAX(VFS_I(ip))) {
status = dax_zero_page_range(VFS_I(ip), pos, bytes,
xfs_get_blocks_direct);
if (status)
break;
} else {
status = pagecache_write_begin(NULL, mapping, pos, bytes,
AOP_FLAG_UNINTERRUPTIBLE,
&page, &fsdata);
if (status)
break;
zero_user(page, offset, bytes);
status = pagecache_write_end(NULL, mapping, pos, bytes,
bytes, page, fsdata);
WARN_ON(status <= 0); /* can't return less than zero! */
status = 0;
}
pos += bytes;
count -= bytes;
} while (count);
return status;
} }
int int
...@@ -423,49 +380,6 @@ xfs_file_splice_read( ...@@ -423,49 +380,6 @@ xfs_file_splice_read(
return ret; return ret;
} }
/*
* This routine is called to handle zeroing any space in the last block of the
* file that is beyond the EOF. We do this since the size is being increased
* without writing anything to that block and we don't want to read the
* garbage on the disk.
*/
STATIC int /* error (positive) */
xfs_zero_last_block(
struct xfs_inode *ip,
xfs_fsize_t offset,
xfs_fsize_t isize,
bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
int zero_len;
int nimaps = 1;
int error = 0;
struct xfs_bmbt_irec imap;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
ASSERT(nimaps > 0);
/*
* If the block underlying isize is just a hole, then there
* is nothing to zero.
*/
if (imap.br_startblock == HOLESTARTBLOCK)
return 0;
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
*did_zeroing = true;
return xfs_iozero(ip, isize, zero_len);
}
/* /*
* Zero any on disk space between the current EOF and the new, larger EOF. * Zero any on disk space between the current EOF and the new, larger EOF.
* *
...@@ -484,94 +398,11 @@ xfs_zero_eof( ...@@ -484,94 +398,11 @@ xfs_zero_eof(
xfs_fsize_t isize, /* current inode size */ xfs_fsize_t isize, /* current inode size */
bool *did_zeroing) bool *did_zeroing)
{ {
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
xfs_fileoff_t end_zero_fsb;
xfs_fileoff_t zero_count_fsb;
xfs_fileoff_t last_fsb;
xfs_fileoff_t zero_off;
xfs_fsize_t zero_len;
int nimaps;
int error = 0;
struct xfs_bmbt_irec imap;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(offset > isize); ASSERT(offset > isize);
trace_xfs_zero_eof(ip, isize, offset - isize); trace_xfs_zero_eof(ip, isize, offset - isize);
return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
/*
* First handle zeroing the block on which isize resides.
*
* We only zero a part of that block so it is handled specially.
*/
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
if (error)
return error;
}
/*
* Calculate the range between the new size and the old where blocks
* needing to be zeroed may exist.
*
* To get the block where the last byte in the file currently resides,
* we need to subtract one from the size and truncate back to a block
* boundary. We subtract 1 in case the size is exactly on a block
* boundary.
*/
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
if (last_fsb == end_zero_fsb) {
/*
* The size was only incremented on its last block.
* We took care of that above, so just return.
*/
return 0;
}
ASSERT(start_zero_fsb <= end_zero_fsb);
while (start_zero_fsb <= end_zero_fsb) {
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
&imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
ASSERT(nimaps > 0);
if (imap.br_state == XFS_EXT_UNWRITTEN ||
imap.br_startblock == HOLESTARTBLOCK) {
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
continue;
}
/*
* There are blocks we need to zero.
*/
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
if ((zero_off + zero_len) > offset)
zero_len = offset - zero_off;
error = xfs_iozero(ip, zero_off, zero_len);
if (error)
return error;
*did_zeroing = true;
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
}
return 0;
} }
/* /*
...@@ -841,7 +672,7 @@ xfs_file_buffered_aio_write( ...@@ -841,7 +672,7 @@ xfs_file_buffered_aio_write(
write_retry: write_retry:
trace_xfs_file_buffered_write(ip, iov_iter_count(from), trace_xfs_file_buffered_write(ip, iov_iter_count(from),
iocb->ki_pos, 0); iocb->ki_pos, 0);
ret = generic_perform_write(file, from, iocb->ki_pos); ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
if (likely(ret >= 0)) if (likely(ret >= 0))
iocb->ki_pos += ret; iocb->ki_pos += ret;
...@@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite( ...@@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite(
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
} else { } else {
ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret); ret = block_page_mkwrite_return(ret);
} }
......
...@@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip, ...@@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
enum xfs_prealloc_flags flags); enum xfs_prealloc_flags flags);
int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
xfs_fsize_t isize, bool *did_zeroing); xfs_fsize_t isize, bool *did_zeroing);
int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
bool *did_zero);
loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
loff_t eof, int whence); loff_t eof, int whence);
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
* along with this program; if not, write the Free Software Foundation, * along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include <linux/iomap.h>
#include "xfs.h" #include "xfs.h"
#include "xfs_fs.h" #include "xfs_fs.h"
#include "xfs_shared.h" #include "xfs_shared.h"
...@@ -940,3 +941,173 @@ xfs_iomap_write_unwritten( ...@@ -940,3 +941,173 @@ xfs_iomap_write_unwritten(
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
} }
void
xfs_bmbt_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
struct xfs_bmbt_irec *imap)
{
struct xfs_mount *mp = ip->i_mount;
if (imap->br_startblock == HOLESTARTBLOCK) {
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->type = IOMAP_HOLE;
} else if (imap->br_startblock == DELAYSTARTBLOCK) {
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->type = IOMAP_DELALLOC;
} else {
iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
if (imap->br_state == XFS_EXT_UNWRITTEN)
iomap->type = IOMAP_UNWRITTEN;
else
iomap->type = IOMAP_MAPPED;
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
}
static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
{
return !nimaps ||
imap->br_startblock == HOLESTARTBLOCK ||
imap->br_startblock == DELAYSTARTBLOCK;
}
static int
xfs_file_iomap_begin(
struct inode *inode,
loff_t offset,
loff_t length,
unsigned flags,
struct iomap *iomap)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb;
int nimaps = 1, error = 0;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
length = mp->m_super->s_maxbytes - offset;
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, XFS_BMAPI_ENTIRE);
if (error) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
/*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
* number pulled out of thin air as a best guess for initial
* testing.
*
* Note that the values needs to be less than 32-bits wide until
* the lower level functions are updated.
*/
length = min_t(loff_t, length, 1024 * PAGE_SIZE);
if (xfs_get_extsz_hint(ip)) {
/*
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
error = xfs_iomap_write_direct(ip, offset, length, &imap,
nimaps);
} else {
error = xfs_iomap_write_delay(ip, offset, length, &imap);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (error)
return error;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
xfs_bmbt_to_iomap(ip, iomap, &imap);
} else if (nimaps) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
xfs_bmbt_to_iomap(ip, iomap, &imap);
} else {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->type = IOMAP_HOLE;
iomap->offset = offset;
iomap->length = length;
}
return 0;
}
static int
xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
ssize_t written)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error = 0;
start_fsb = XFS_B_TO_FSB(mp, offset + written);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
/*
* Trim back delalloc blocks if we didn't manage to write the whole
* range reserved.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
if (start_fsb < end_fsb) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
end_fsb - start_fsb);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error && !XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert(mp, "%s: unable to clean up ino %lld",
__func__, ip->i_ino);
return error;
}
}
return 0;
}
static int
xfs_file_iomap_end(
struct inode *inode,
loff_t offset,
loff_t length,
ssize_t written,
unsigned flags,
struct iomap *iomap)
{
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
length, written);
return 0;
}
struct iomap_ops xfs_iomap_ops = {
.iomap_begin = xfs_file_iomap_begin,
.iomap_end = xfs_file_iomap_end,
};
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#ifndef __XFS_IOMAP_H__ #ifndef __XFS_IOMAP_H__
#define __XFS_IOMAP_H__ #define __XFS_IOMAP_H__
#include <linux/iomap.h>
struct xfs_inode; struct xfs_inode;
struct xfs_bmbt_irec; struct xfs_bmbt_irec;
...@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, ...@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
struct xfs_bmbt_irec *); struct xfs_bmbt_irec *);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *);
extern struct iomap_ops xfs_iomap_ops;
#endif /* __XFS_IOMAP_H__*/ #endif /* __XFS_IOMAP_H__*/
...@@ -38,12 +38,13 @@ ...@@ -38,12 +38,13 @@
#include "xfs_dir2.h" #include "xfs_dir2.h"
#include "xfs_trans_space.h" #include "xfs_trans_space.h"
#include "xfs_pnfs.h" #include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include <linux/capability.h> #include <linux/capability.h>
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/posix_acl.h> #include <linux/posix_acl.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/fiemap.h> #include <linux/iomap.h>
#include <linux/slab.h> #include <linux/slab.h>
/* /*
...@@ -800,20 +801,30 @@ xfs_setattr_size( ...@@ -800,20 +801,30 @@ xfs_setattr_size(
if (error) if (error)
return error; return error;
/*
* Wait for all direct I/O to complete.
*/
inode_dio_wait(inode);
/* /*
* File data changes must be complete before we start the transaction to * File data changes must be complete before we start the transaction to
* modify the inode. This needs to be done before joining the inode to * modify the inode. This needs to be done before joining the inode to
* the transaction because the inode cannot be unlocked once it is a * the transaction because the inode cannot be unlocked once it is a
* part of the transaction. * part of the transaction.
* *
* Start with zeroing any data block beyond EOF that we may expose on * Start with zeroing any data beyond EOF that we may expose on file
* file extension. * extension, or zeroing out the rest of the block on a downward
* truncate.
*/ */
if (newsize > oldsize) { if (newsize > oldsize) {
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
} else {
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
}
if (error) if (error)
return error; return error;
}
/* /*
* We are going to log the inode size change in this transaction so * We are going to log the inode size change in this transaction so
...@@ -823,17 +834,14 @@ xfs_setattr_size( ...@@ -823,17 +834,14 @@ xfs_setattr_size(
* problem. Note that this includes any block zeroing we did above; * problem. Note that this includes any block zeroing we did above;
* otherwise those blocks may not be zeroed after a crash. * otherwise those blocks may not be zeroed after a crash.
*/ */
if (newsize > ip->i_d.di_size && if (did_zeroing ||
(oldsize != ip->i_d.di_size || did_zeroing)) { (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize); ip->i_d.di_size, newsize);
if (error) if (error)
return error; return error;
} }
/* Now wait for all direct I/O to complete. */
inode_dio_wait(inode);
/* /*
* We've already locked out new page faults, so now we can safely remove * We've already locked out new page faults, so now we can safely remove
* pages from the page cache knowing they won't get refaulted until we * pages from the page cache knowing they won't get refaulted until we
...@@ -851,13 +859,6 @@ xfs_setattr_size( ...@@ -851,13 +859,6 @@ xfs_setattr_size(
* to hope that the caller sees ENOMEM and retries the truncate * to hope that the caller sees ENOMEM and retries the truncate
* operation. * operation.
*/ */
if (IS_DAX(inode))
error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
else
error = block_truncate_page(inode->i_mapping, newsize,
xfs_get_blocks);
if (error)
return error;
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
...@@ -998,51 +999,6 @@ xfs_vn_update_time( ...@@ -998,51 +999,6 @@ xfs_vn_update_time(
return xfs_trans_commit(tp); return xfs_trans_commit(tp);
} }
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
/*
* Call fiemap helper to fill in user data.
* Returns positive errors to xfs_getbmap.
*/
STATIC int
xfs_fiemap_format(
void **arg,
struct getbmapx *bmv,
int *full)
{
int error;
struct fiemap_extent_info *fieinfo = *arg;
u32 fiemap_flags = 0;
u64 logical, physical, length;
/* Do nothing for a hole */
if (bmv->bmv_block == -1LL)
return 0;
logical = BBTOB(bmv->bmv_offset);
physical = BBTOB(bmv->bmv_block);
length = BBTOB(bmv->bmv_length);
if (bmv->bmv_oflags & BMV_OF_PREALLOC)
fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
physical = 0; /* no block yet */
}
if (bmv->bmv_oflags & BMV_OF_LAST)
fiemap_flags |= FIEMAP_EXTENT_LAST;
error = fiemap_fill_next_extent(fieinfo, logical, physical,
length, fiemap_flags);
if (error > 0) {
error = 0;
*full = 1; /* user array now full */
}
return error;
}
STATIC int STATIC int
xfs_vn_fiemap( xfs_vn_fiemap(
struct inode *inode, struct inode *inode,
...@@ -1050,38 +1006,13 @@ xfs_vn_fiemap( ...@@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
u64 start, u64 start,
u64 length) u64 length)
{ {
xfs_inode_t *ip = XFS_I(inode);
struct getbmapx bm;
int error; int error;
error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
if (error) error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
return error; xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
/* Set up bmap header for xfs internal routine */
bm.bmv_offset = BTOBBT(start);
/* Special case for whole file */
if (length == FIEMAP_MAX_OFFSET)
bm.bmv_length = -1LL;
else
bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
/* We add one because in getbmap world count includes the header */
bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
fieinfo->fi_extents_max + 1;
bm.bmv_count = min_t(__s32, bm.bmv_count,
(PAGE_SIZE * 16 / sizeof(struct getbmapx)));
bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
bm.bmv_iflags |= BMV_IF_ATTRFORK;
if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
bm.bmv_iflags |= BMV_IF_DELALLOC;
error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
if (error)
return error; return error;
return 0;
} }
STATIC int STATIC int
......
...@@ -80,32 +80,6 @@ xfs_fs_get_uuid( ...@@ -80,32 +80,6 @@ xfs_fs_get_uuid(
return 0; return 0;
} }
static void
xfs_bmbt_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
struct xfs_bmbt_irec *imap)
{
struct xfs_mount *mp = ip->i_mount;
if (imap->br_startblock == HOLESTARTBLOCK) {
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->type = IOMAP_HOLE;
} else if (imap->br_startblock == DELAYSTARTBLOCK) {
iomap->blkno = IOMAP_NULL_BLOCK;
iomap->type = IOMAP_DELALLOC;
} else {
iomap->blkno =
XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock);
if (imap->br_state == XFS_EXT_UNWRITTEN)
iomap->type = IOMAP_UNWRITTEN;
else
iomap->type = IOMAP_MAPPED;
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
}
/* /*
* Get a layout for the pNFS client. * Get a layout for the pNFS client.
*/ */
......
...@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); ...@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_get_blocks_found); DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
DEFINE_IOMAP_EVENT(xfs_iomap_found);
DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
DECLARE_EVENT_CLASS(xfs_simple_io_class, DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment