Commit cc714660 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: add dedupe range vfs function

Define a VFS function which allows userspace to request that the
kernel reflink a range of blocks between two files if the ranges'
contents match.  The function fits the new VFS ioctl that standardizes
the checking for the btrfs EXTENT SAME ioctl.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 9fe26045
...@@ -1010,7 +1010,8 @@ xfs_file_share_range( ...@@ -1010,7 +1010,8 @@ xfs_file_share_range(
loff_t pos_in, loff_t pos_in,
struct file *file_out, struct file *file_out,
loff_t pos_out, loff_t pos_out,
u64 len) u64 len,
bool is_dedupe)
{ {
struct inode *inode_in; struct inode *inode_in;
struct inode *inode_out; struct inode *inode_out;
...@@ -1019,6 +1020,7 @@ xfs_file_share_range( ...@@ -1019,6 +1020,7 @@ xfs_file_share_range(
loff_t isize; loff_t isize;
int same_inode; int same_inode;
loff_t blen; loff_t blen;
unsigned int flags = 0;
inode_in = file_inode(file_in); inode_in = file_inode(file_in);
inode_out = file_inode(file_out); inode_out = file_inode(file_out);
...@@ -1056,6 +1058,15 @@ xfs_file_share_range( ...@@ -1056,6 +1058,15 @@ xfs_file_share_range(
pos_in + len > isize) pos_in + len > isize)
return -EINVAL; return -EINVAL;
/* Don't allow dedupe past EOF in the dest file */
if (is_dedupe) {
loff_t disize;
disize = i_size_read(inode_out);
if (pos_out >= disize || pos_out + len > disize)
return -EINVAL;
}
/* If we're linking to EOF, continue to the block boundary. */ /* If we're linking to EOF, continue to the block boundary. */
if (pos_in + len == isize) if (pos_in + len == isize)
blen = ALIGN(isize, bs) - pos_in; blen = ALIGN(isize, bs) - pos_in;
...@@ -1079,8 +1090,10 @@ xfs_file_share_range( ...@@ -1079,8 +1090,10 @@ xfs_file_share_range(
if (ret) if (ret)
goto out_unlock; goto out_unlock;
if (is_dedupe)
flags |= XFS_REFLINK_DEDUPE;
ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
pos_out, len); pos_out, len, flags);
if (ret < 0) if (ret < 0)
goto out_unlock; goto out_unlock;
...@@ -1100,7 +1113,7 @@ xfs_file_copy_range( ...@@ -1100,7 +1113,7 @@ xfs_file_copy_range(
int error; int error;
error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
len); len, false);
if (error) if (error)
return error; return error;
return len; return len;
...@@ -1115,7 +1128,33 @@ xfs_file_clone_range( ...@@ -1115,7 +1128,33 @@ xfs_file_clone_range(
u64 len) u64 len)
{ {
return xfs_file_share_range(file_in, pos_in, file_out, pos_out, return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
len); len, false);
}
#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
STATIC ssize_t
xfs_file_dedupe_range(
struct file *src_file,
u64 loff,
u64 len,
struct file *dst_file,
u64 dst_loff)
{
int error;
/*
* Limit the total length we will dedupe for each operation.
* This is intended to bound the total time spent in this
* ioctl to something sane.
*/
if (len > XFS_MAX_DEDUPE_LEN)
len = XFS_MAX_DEDUPE_LEN;
error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
return error;
return len;
} }
STATIC int STATIC int
...@@ -1779,6 +1818,7 @@ const struct file_operations xfs_file_operations = { ...@@ -1779,6 +1818,7 @@ const struct file_operations xfs_file_operations = {
.fallocate = xfs_file_fallocate, .fallocate = xfs_file_fallocate,
.copy_file_range = xfs_file_copy_range, .copy_file_range = xfs_file_copy_range,
.clone_file_range = xfs_file_clone_range, .clone_file_range = xfs_file_clone_range,
.dedupe_file_range = xfs_file_dedupe_range,
}; };
const struct file_operations xfs_dir_file_operations = { const struct file_operations xfs_dir_file_operations = {
......
...@@ -1149,6 +1149,111 @@ xfs_reflink_remap_blocks( ...@@ -1149,6 +1149,111 @@ xfs_reflink_remap_blocks(
return error; return error;
} }
/*
* Read a page's worth of file data into the page cache. Return the page
* locked.
*/
static struct page *
xfs_get_page(
struct inode *inode,
xfs_off_t offset)
{
struct address_space *mapping;
struct page *page;
pgoff_t n;
n = offset >> PAGE_SHIFT;
mapping = inode->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page))
return page;
if (!PageUptodate(page)) {
put_page(page);
return ERR_PTR(-EIO);
}
lock_page(page);
return page;
}
/*
* Compare extents of two files to see if they are the same.
*/
static int
xfs_compare_extents(
struct inode *src,
xfs_off_t srcoff,
struct inode *dest,
xfs_off_t destoff,
xfs_off_t len,
bool *is_same)
{
xfs_off_t src_poff;
xfs_off_t dest_poff;
void *src_addr;
void *dest_addr;
struct page *src_page;
struct page *dest_page;
xfs_off_t cmp_len;
bool same;
int error;
error = -EINVAL;
same = true;
while (len) {
src_poff = srcoff & (PAGE_SIZE - 1);
dest_poff = destoff & (PAGE_SIZE - 1);
cmp_len = min(PAGE_SIZE - src_poff,
PAGE_SIZE - dest_poff);
cmp_len = min(cmp_len, len);
ASSERT(cmp_len > 0);
trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
XFS_I(dest), destoff);
src_page = xfs_get_page(src, srcoff);
if (IS_ERR(src_page)) {
error = PTR_ERR(src_page);
goto out_error;
}
dest_page = xfs_get_page(dest, destoff);
if (IS_ERR(dest_page)) {
error = PTR_ERR(dest_page);
unlock_page(src_page);
put_page(src_page);
goto out_error;
}
src_addr = kmap_atomic(src_page);
dest_addr = kmap_atomic(dest_page);
flush_dcache_page(src_page);
flush_dcache_page(dest_page);
if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
same = false;
kunmap_atomic(dest_addr);
kunmap_atomic(src_addr);
unlock_page(dest_page);
unlock_page(src_page);
put_page(dest_page);
put_page(src_page);
if (!same)
break;
srcoff += cmp_len;
destoff += cmp_len;
len -= cmp_len;
}
*is_same = same;
return 0;
out_error:
trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
return error;
}
/* /*
* Link a range of blocks from one file to another. * Link a range of blocks from one file to another.
*/ */
...@@ -1158,12 +1263,14 @@ xfs_reflink_remap_range( ...@@ -1158,12 +1263,14 @@ xfs_reflink_remap_range(
xfs_off_t srcoff, xfs_off_t srcoff,
struct xfs_inode *dest, struct xfs_inode *dest,
xfs_off_t destoff, xfs_off_t destoff,
xfs_off_t len) xfs_off_t len,
unsigned int flags)
{ {
struct xfs_mount *mp = src->i_mount; struct xfs_mount *mp = src->i_mount;
xfs_fileoff_t sfsbno, dfsbno; xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen; xfs_filblks_t fsblen;
int error; int error;
bool is_same;
if (!xfs_sb_version_hasreflink(&mp->m_sb)) if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -1175,6 +1282,9 @@ xfs_reflink_remap_range( ...@@ -1175,6 +1282,9 @@ xfs_reflink_remap_range(
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
return -EINVAL; return -EINVAL;
if (flags & ~XFS_REFLINK_ALL)
return -EINVAL;
trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
/* Lock both files against IO */ /* Lock both files against IO */
...@@ -1186,6 +1296,21 @@ xfs_reflink_remap_range( ...@@ -1186,6 +1296,21 @@ xfs_reflink_remap_range(
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
} }
/*
* Check that the extents are the same.
*/
if (flags & XFS_REFLINK_DEDUPE) {
is_same = false;
error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
destoff, len, &is_same);
if (error)
goto out_error;
if (!is_same) {
error = -EBADE;
goto out_error;
}
}
error = xfs_reflink_set_inode_flag(src, dest); error = xfs_reflink_set_inode_flag(src, dest);
if (error) if (error)
goto out_error; goto out_error;
......
...@@ -43,7 +43,10 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, ...@@ -43,7 +43,10 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count); xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */
#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE)
extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len); struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
unsigned int flags);
#endif /* __XFS_REFLINK_H */ #endif /* __XFS_REFLINK_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment