Commit fce205e9 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs copy_file_range updates from Al Viro:
 "Several series around copy_file_range/CLONE"

* 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  btrfs: use new dedupe data function pointer
  vfs: hoist the btrfs deduplication ioctl to the vfs
  vfs: wire up compat ioctl for CLONE/CLONE_RANGE
  cifs: avoid unused variable and label
  nfsd: implement the NFSv4.2 CLONE operation
  nfsd: Pass filehandle to nfs4_preprocess_stateid_op()
  vfs: pull btrfs clone API to vfs layer
  locks: new locks_mandatory_area calling convention
  vfs: Add vfs_copy_file_range() support for pagecache copies
  btrfs: add .copy_file_range file operation
  x86: add sys_copy_file_range to syscall tables
  vfs: add copy_file_range syscall and vfs helper
parents 065019a3 2b3909f8
......@@ -383,3 +383,4 @@
374 i386 userfaultfd sys_userfaultfd
375 i386 membarrier sys_membarrier
376 i386 mlock2 sys_mlock2
377 i386 copy_file_range sys_copy_file_range
......@@ -332,6 +332,7 @@
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
326 common copy_file_range sys_copy_file_range
#
# x32-specific system call numbers start at 512 to avoid cache impact
......
......@@ -4024,7 +4024,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
struct btrfs_ioctl_balance_args *bargs);
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff);
/* file.c */
int btrfs_auto_defrag_init(void);
......@@ -4055,6 +4056,11 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags);
int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
......
......@@ -2934,6 +2934,9 @@ const struct file_operations btrfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,
#endif
.copy_file_range = btrfs_copy_file_range,
.clone_file_range = btrfs_clone_file_range,
.dedupe_file_range = btrfs_dedupe_file_range,
};
void btrfs_auto_defrag_exit(void)
......
......@@ -2962,7 +2962,7 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
flush_dcache_page(dst_page);
if (memcmp(addr, dst_addr, cmp_len))
ret = BTRFS_SAME_DATA_DIFFERS;
ret = -EBADE;
kunmap_atomic(addr);
kunmap_atomic(dst_addr);
......@@ -3098,53 +3098,16 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
static long btrfs_ioctl_file_extent_same(struct file *file,
struct btrfs_ioctl_same_args __user *argp)
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff)
{
struct btrfs_ioctl_same_args *same = NULL;
struct btrfs_ioctl_same_extent_info *info;
struct inode *src = file_inode(file);
u64 off;
u64 len;
int i;
int ret;
unsigned long size;
struct inode *src = file_inode(src_file);
struct inode *dst = file_inode(dst_file);
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
bool is_admin = capable(CAP_SYS_ADMIN);
u16 count;
if (!(file->f_mode & FMODE_READ))
return -EINVAL;
ret = mnt_want_write_file(file);
if (ret)
return ret;
if (get_user(count, &argp->dest_count)) {
ret = -EFAULT;
goto out;
}
size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
same = memdup_user(argp, size);
if (IS_ERR(same)) {
ret = PTR_ERR(same);
same = NULL;
goto out;
}
ssize_t res;
off = same->logical_offset;
len = same->length;
/*
* Limit the total length we will dedupe for each operation.
* This is intended to bound the total time spent in this
* ioctl to something sane.
*/
if (len > BTRFS_MAX_DEDUPE_LEN)
len = BTRFS_MAX_DEDUPE_LEN;
if (olen > BTRFS_MAX_DEDUPE_LEN)
olen = BTRFS_MAX_DEDUPE_LEN;
if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
/*
......@@ -3152,58 +3115,13 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
* result, btrfs_cmp_data() won't correctly handle
* this situation without an update.
*/
ret = -EINVAL;
goto out;
}
ret = -EISDIR;
if (S_ISDIR(src->i_mode))
goto out;
ret = -EACCES;
if (!S_ISREG(src->i_mode))
goto out;
/* pre-format output fields to sane values */
for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;
same->info[i].status = 0;
}
for (i = 0, info = same->info; i < count; i++, info++) {
struct inode *dst;
struct fd dst_file = fdget(info->fd);
if (!dst_file.file) {
info->status = -EBADF;
continue;
}
dst = file_inode(dst_file.file);
if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
info->status = -EINVAL;
} else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
info->status = -EXDEV;
} else if (S_ISDIR(dst->i_mode)) {
info->status = -EISDIR;
} else if (!S_ISREG(dst->i_mode)) {
info->status = -EACCES;
} else {
info->status = btrfs_extent_same(src, off, len, dst,
info->logical_offset);
if (info->status == 0)
info->bytes_deduped += len;
}
fdput(dst_file);
return -EINVAL;
}
ret = copy_to_user(argp, same, size);
if (ret)
ret = -EFAULT;
out:
mnt_drop_write_file(file);
kfree(same);
return ret;
res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
if (res)
return res;
return olen;
}
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
......@@ -3779,17 +3697,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
return ret;
}
static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
u64 off, u64 olen, u64 destoff)
{
struct inode *inode = file_inode(file);
struct inode *src = file_inode(file_src);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fd src_file;
struct inode *src;
int ret;
u64 len = olen;
u64 bs = root->fs_info->sb->s_blocksize;
int same_inode = 0;
int same_inode = src == inode;
/*
* TODO:
......@@ -3802,49 +3719,20 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* be either compressed or non-compressed.
*/
/* the destination must be opened for writing */
if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
if (btrfs_root_readonly(root))
return -EROFS;
ret = mnt_want_write_file(file);
if (ret)
return ret;
src_file = fdget(srcfd);
if (!src_file.file) {
ret = -EBADF;
goto out_drop_write;
}
ret = -EXDEV;
if (src_file.file->f_path.mnt != file->f_path.mnt)
goto out_fput;
src = file_inode(src_file.file);
ret = -EINVAL;
if (src == inode)
same_inode = 1;
/* the src must be open for reading */
if (!(src_file.file->f_mode & FMODE_READ))
goto out_fput;
if (file_src->f_path.mnt != file->f_path.mnt ||
src->i_sb != inode->i_sb)
return -EXDEV;
/* don't make the dst file partly checksummed */
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
goto out_fput;
return -EINVAL;
ret = -EISDIR;
if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
goto out_fput;
ret = -EXDEV;
if (src->i_sb != inode->i_sb)
goto out_fput;
return -EISDIR;
if (!same_inode) {
btrfs_double_inode_lock(src, inode);
......@@ -3921,21 +3809,25 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_double_inode_unlock(src, inode);
else
mutex_unlock(&src->i_mutex);
out_fput:
fdput(src_file);
out_drop_write:
mnt_drop_write_file(file);
return ret;
}
static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
struct btrfs_ioctl_clone_range_args args;
ssize_t ret;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
args.src_length, args.dest_offset);
ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
if (ret == 0)
ret = len;
return ret;
}
int btrfs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
{
return btrfs_clone_files(dst_file, src_file, off, len, destoff);
}
/*
......@@ -5485,10 +5377,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
return btrfs_ioctl_clone_range(file, argp);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
......@@ -5566,8 +5454,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_fslabel(file, argp);
case BTRFS_IOC_SET_FSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
case BTRFS_IOC_FILE_EXTENT_SAME:
return btrfs_ioctl_file_extent_same(file, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
return btrfs_ioctl_get_supported_features(file, argp);
case BTRFS_IOC_GET_FEATURES:
......
......@@ -913,6 +913,59 @@ const struct inode_operations cifs_symlink_inode_ops = {
#endif
};
static int cifs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
{
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
struct cifsFileInfo *smb_file_src = src_file->private_data;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
unsigned int xid;
int rc;
cifs_dbg(FYI, "clone range\n");
xid = get_xid();
if (!src_file->private_data || !dst_file->private_data) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
goto out;
}
/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories(target_inode, src_inode);
if (len == 0)
len = src_inode->i_size - off;
cifs_dbg(FYI, "about to flush pages\n");
/* should we flush first and last page first */
truncate_inode_pages_range(&target_inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len)-1);
if (target_tcon->ses->server->ops->duplicate_extents)
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
smb_file_src, smb_file_target, off, len, destoff);
else
rc = -EOPNOTSUPP;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I(target_inode)->time = 0;
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories(src_inode, target_inode);
out:
free_xid(xid);
return rc;
}
const struct file_operations cifs_file_ops = {
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
......@@ -925,6 +978,7 @@ const struct file_operations cifs_file_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
......@@ -941,6 +995,8 @@ const struct file_operations cifs_file_strict_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
......@@ -957,6 +1013,7 @@ const struct file_operations cifs_file_direct_ops = {
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
......@@ -973,6 +1030,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
......@@ -988,6 +1046,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
......@@ -1003,6 +1062,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
......@@ -1013,6 +1073,7 @@ const struct file_operations cifs_dir_ops = {
.release = cifs_closedir,
.read = generic_read_dir,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.llseek = generic_file_llseek,
};
......
......@@ -130,7 +130,6 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *,
extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_CIFS_NFSD_EXPORT
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
......
......@@ -34,73 +34,36 @@
#include "cifs_ioctl.h"
#include <linux/btrfs.h>
static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
unsigned long srcfd, u64 off, u64 len, u64 destoff,
bool dup_extents)
static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
struct file *dst_file)
{
int rc;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
struct cifs_tcon *target_tcon;
struct fd src_file;
struct cifsFileInfo *smb_file_src;
struct inode *src_inode;
struct cifsFileInfo *smb_file_target;
struct cifs_tcon *src_tcon;
struct cifs_tcon *target_tcon;
int rc;
cifs_dbg(FYI, "ioctl clone range\n");
/* the destination must be opened for writing */
if (!(dst_file->f_mode & FMODE_WRITE)) {
cifs_dbg(FYI, "file target not open for write\n");
return -EINVAL;
}
/* check if target volume is readonly and take reference */
rc = mnt_want_write_file(dst_file);
if (rc) {
cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
return rc;
}
src_file = fdget(srcfd);
if (!src_file.file) {
rc = -EBADF;
goto out_drop_write;
}
if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
rc = -EBADF;
cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
goto out_fput;
}
if ((!src_file.file->private_data) || (!dst_file->private_data)) {
if (!src_file->private_data || !dst_file->private_data) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
goto out_fput;
goto out;
}
rc = -EXDEV;
smb_file_target = dst_file->private_data;
smb_file_src = src_file.file->private_data;
smb_file_src = src_file->private_data;
src_tcon = tlink_tcon(smb_file_src->tlink);
target_tcon = tlink_tcon(smb_file_target->tlink);
/* check source and target on same server (or volume if dup_extents) */
if (dup_extents && (src_tcon != target_tcon)) {
cifs_dbg(VFS, "source and target of copy not on same share\n");
goto out_fput;
}
if (!dup_extents && (src_tcon->ses != target_tcon->ses)) {
if (src_tcon->ses != target_tcon->ses) {
cifs_dbg(VFS, "source and target of copy not on same server\n");
goto out_fput;
goto out;
}
src_inode = file_inode(src_file.file);
rc = -EINVAL;
if (S_ISDIR(src_inode->i_mode))
goto out_fput;
/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
......@@ -108,34 +71,66 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
*/
lock_two_nondirectories(target_inode, src_inode);
/* determine range to clone */
rc = -EINVAL;
if (off + len > src_inode->i_size || off + len < off)
goto out_unlock;
if (len == 0)
len = src_inode->i_size - off;
cifs_dbg(FYI, "about to flush pages\n");
/* should we flush first and last page first */
truncate_inode_pages_range(&target_inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len)-1);
truncate_inode_pages(&target_inode->i_data, 0);
if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
smb_file_src, smb_file_target, off, len, destoff);
else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
if (target_tcon->ses->server->ops->clone_range)
rc = target_tcon->ses->server->ops->clone_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
else
rc = -EOPNOTSUPP;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I(target_inode)->time = 0;
out_unlock:
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories(src_inode, target_inode);
out:
return rc;
}
static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
unsigned long srcfd)
{
int rc;
struct fd src_file;
struct inode *src_inode;
cifs_dbg(FYI, "ioctl clone range\n");
/* the destination must be opened for writing */
if (!(dst_file->f_mode & FMODE_WRITE)) {
cifs_dbg(FYI, "file target not open for write\n");
return -EINVAL;
}
/* check if target volume is readonly and take reference */
rc = mnt_want_write_file(dst_file);
if (rc) {
cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
return rc;
}
src_file = fdget(srcfd);
if (!src_file.file) {
rc = -EBADF;
goto out_drop_write;
}
if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
rc = -EBADF;
cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
goto out_fput;
}
src_inode = file_inode(src_file.file);
rc = -EINVAL;
if (S_ISDIR(src_inode->i_mode))
goto out_fput;
rc = cifs_file_clone_range(xid, src_file.file, dst_file);
out_fput:
fdput(src_file);
out_drop_write:
......@@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
}
break;
case CIFS_IOC_COPYCHUNK_FILE:
rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
break;
case BTRFS_IOC_CLONE:
rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
rc = cifs_ioctl_clone(xid, filep, arg);
break;
case CIFS_IOC_SET_INTEGRITY:
if (pSMBFile == NULL)
......
......@@ -1601,6 +1601,11 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
goto out_fput;
#endif
case FICLONE:
case FICLONERANGE:
case FIDEDUPERANGE:
goto do_ioctl;
case FIBMAP:
case FIGETBSZ:
case FIONREAD:
......
......@@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
return error;
}
static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
{
struct fd src_file = fdget(srcfd);
int ret;
if (!src_file.file)
return -EBADF;
ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
fdput(src_file);
return ret;
}
static long ioctl_file_clone_range(struct file *file, void __user *argp)
{
struct file_clone_range args;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
return ioctl_file_clone(file, args.src_fd, args.src_offset,
args.src_length, args.dest_offset);
}
#ifdef CONFIG_BLOCK
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
......@@ -545,6 +568,41 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
{
struct file_dedupe_range __user *argp = arg;
struct file_dedupe_range *same = NULL;
int ret;
unsigned long size;
u16 count;
if (get_user(count, &argp->dest_count)) {
ret = -EFAULT;
goto out;
}
size = offsetof(struct file_dedupe_range __user, info[count]);
same = memdup_user(argp, size);
if (IS_ERR(same)) {
ret = PTR_ERR(same);
same = NULL;
goto out;
}
ret = vfs_dedupe_file_range(file, same);
if (ret)
goto out;
ret = copy_to_user(argp, same, size);
if (ret)
ret = -EFAULT;
out:
kfree(same);
return ret;
}
/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
......@@ -600,6 +658,15 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
case FIGETBSZ:
return put_user(inode->i_sb->s_blocksize, argp);
case FICLONE:
return ioctl_file_clone(filp, arg, 0, 0, 0);
case FICLONERANGE:
return ioctl_file_clone_range(filp, argp);
case FIDEDUPERANGE:
return ioctl_file_dedupe_range(filp, argp);
default:
if (S_ISREG(inode->i_mode))
error = file_ioctl(filp, cmd, arg);
......
......@@ -1258,20 +1258,16 @@ int locks_mandatory_locked(struct file *file)
/**
* locks_mandatory_area - Check for a conflicting lock
* @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ
* for shared
* @inode: the file to check
* @inode: the file to check
* @filp: how the file was opened (if it was)
* @offset: start of area to check
* @count: length of area to check
* @start: first byte in the file to check
* @end: lastbyte in the file to check
* @type: %F_WRLCK for a write lock, else %F_RDLCK
*
* Searches the inode's list of locks to find any POSIX locks which conflict.
* This function is called from rw_verify_area() and
* locks_verify_truncate().
*/
int locks_mandatory_area(int read_write, struct inode *inode,
struct file *filp, loff_t offset,
size_t count)
int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
loff_t end, unsigned char type)
{
struct file_lock fl;
int error;
......@@ -1283,9 +1279,9 @@ int locks_mandatory_area(int read_write, struct inode *inode,
fl.fl_flags = FL_POSIX | FL_ACCESS;
if (filp && !(filp->f_flags & O_NONBLOCK))
sleep = true;
fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
fl.fl_start = offset;
fl.fl_end = offset + count - 1;
fl.fl_type = type;
fl.fl_start = start;
fl.fl_end = end;
for (;;) {
if (filp) {
......
......@@ -195,65 +195,27 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
return nfs42_proc_allocate(filep, offset, len);
}
static noinline long
nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
u64 src_off, u64 dst_off, u64 count)
static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, u64 count)
{
struct inode *dst_inode = file_inode(dst_file);
struct nfs_server *server = NFS_SERVER(dst_inode);
struct fd src_file;
struct inode *src_inode;
struct inode *src_inode = file_inode(src_file);
unsigned int bs = server->clone_blksize;
bool same_inode = false;
int ret;
/* dst file must be opened for writing */
if (!(dst_file->f_mode & FMODE_WRITE))
return -EINVAL;
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
src_file = fdget(srcfd);
if (!src_file.file) {
ret = -EBADF;
goto out_drop_write;
}
src_inode = file_inode(src_file.file);
if (src_inode == dst_inode)
same_inode = true;
/* src file must be opened for reading */
if (!(src_file.file->f_mode & FMODE_READ))
goto out_fput;
/* src and dst must be regular files */
ret = -EISDIR;
if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode))
goto out_fput;
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt ||
src_inode->i_sb != dst_inode->i_sb)
goto out_fput;
/* check alignment w.r.t. clone_blksize */
ret = -EINVAL;
if (bs) {
if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs))
goto out_fput;
goto out;
if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count))
goto out_fput;
goto out;
}
/* verify if ranges are overlapped within the same file */
if (same_inode) {
if (dst_off + count > src_off && dst_off < src_off + count)
goto out_fput;
}
if (src_inode == dst_inode)
same_inode = true;
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
if (same_inode) {
......@@ -275,7 +237,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
if (ret)
goto out_unlock;
ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count);
ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count);
/* truncate inode page cache of the dst range so that future reads can fetch
* new data from server */
......@@ -292,37 +254,9 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
mutex_unlock(&dst_inode->i_mutex);
mutex_unlock(&src_inode->i_mutex);
}
out_fput:
fdput(src_file);
out_drop_write:
mnt_drop_write_file(dst_file);
out:
return ret;
}
static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
{
struct btrfs_ioctl_clone_range_args args;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
args.dest_offset, args.src_length);
}
long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
switch (cmd) {
case BTRFS_IOC_CLONE:
return nfs42_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
return nfs42_ioctl_clone_range(file, argp);
}
return -ENOTTY;
}
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
......@@ -342,8 +276,7 @@ const struct file_operations nfs4_file_operations = {
#ifdef CONFIG_NFS_V4_2
.llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate,
.unlocked_ioctl = nfs4_ioctl,
.compat_ioctl = nfs4_ioctl,
.clone_file_range = nfs42_clone_file_range,
#else
.llseek = nfs_file_llseek,
#endif
......
......@@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
RD_STATE, &read->rd_filp, &read->rd_tmp_file);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
&read->rd_filp, &read->rd_tmp_file);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
......@@ -921,7 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&setattr->sa_stateid, WR_STATE, NULL, NULL);
&cstate->current_fh, &setattr->sa_stateid,
WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
......@@ -985,8 +987,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
&filp, NULL);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
stateid, WR_STATE, &filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
......@@ -1009,6 +1011,47 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
static __be32
nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_clone *clone)
{
struct file *src, *dst;
__be32 status;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
&clone->cl_src_stateid, RD_STATE,
&src, NULL);
if (status) {
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
goto out;
}
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&clone->cl_dst_stateid, WR_STATE,
&dst, NULL);
if (status) {
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
goto out_put_src;
}
/* fix up for NFS-specific error code */
if (!S_ISREG(file_inode(src)->i_mode) ||
!S_ISREG(file_inode(dst)->i_mode)) {
status = nfserr_wrong_type;
goto out_put_dst;
}
status = nfsd4_clone_file_range(src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count);
out_put_dst:
fput(dst);
out_put_src:
fput(src);
out:
return status;
}
static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
......@@ -1016,7 +1059,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status = nfserr_notsupp;
struct file *file;
status = nfs4_preprocess_stateid_op(rqstp, cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
WR_STATE, &file, NULL);
if (status != nfs_ok) {
......@@ -1055,7 +1098,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct file *file;
status = nfs4_preprocess_stateid_op(rqstp, cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
RD_STATE, &file, NULL);
if (status) {
......@@ -2279,6 +2322,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_CLONE] = {
.op_func = (nfsd4op_func)nfsd4_clone,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_name = "OP_CLONE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
......
......@@ -4797,10 +4797,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
int flags, struct file **filpp, bool *tmp_file)
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
{
struct svc_fh *fhp = &cstate->current_fh;
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
......
......@@ -1674,6 +1674,25 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
DECODE_TAIL;
}
static __be32
nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
{
DECODE_HEAD;
status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
if (status)
return status;
status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
if (status)
return status;
READ_BUF(8 + 8 + 8);
p = xdr_decode_hyper(p, &clone->cl_src_pos);
p = xdr_decode_hyper(p, &clone->cl_dst_pos);
p = xdr_decode_hyper(p, &clone->cl_count);
DECODE_TAIL;
}
static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
......@@ -1785,6 +1804,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
};
static inline bool
......@@ -4292,6 +4312,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
[OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
};
/*
......
......@@ -578,8 +578,8 @@ struct nfsd4_compound_state;
struct nfsd_net;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
int flags, struct file **filp, bool *tmp_file);
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
......
......@@ -36,6 +36,7 @@
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
#include "../internal.h"
#include "acl.h"
#include "idmap.h"
#endif /* CONFIG_NFSD_V4 */
......@@ -498,6 +499,13 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
count));
}
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
......
......@@ -56,6 +56,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
u64, u64);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
......
......@@ -491,6 +491,15 @@ struct nfsd4_fallocate {
u64 falloc_length;
};
struct nfsd4_clone {
/* request */
stateid_t cl_src_stateid;
stateid_t cl_dst_stateid;
u64 cl_src_pos;
u64 cl_dst_pos;
u64 cl_count;
};
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
......@@ -555,6 +564,7 @@ struct nfsd4_op {
/* NFSv4.2 */
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
struct nfsd4_clone clone;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
......
......@@ -16,6 +16,7 @@
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include "internal.h"
#include <asm/uaccess.h>
......@@ -395,9 +396,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
}
if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
retval = locks_mandatory_area(
read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
inode, file, pos, count);
retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
read_write == READ ? F_RDLCK : F_WRLCK);
if (retval < 0)
return retval;
}
......@@ -1327,3 +1327,299 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
return do_sendfile(out_fd, in_fd, NULL, count, 0);
}
#endif
/*
* copy_file_range() differs from regular file read and write in that it
* specifically allows return partial success. When it does so is up to
* the copy_file_range method.
*/
ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
ssize_t ret;
if (flags != 0)
return -EINVAL;
/* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */
ret = rw_verify_area(READ, file_in, &pos_in, len);
if (ret >= 0)
ret = rw_verify_area(WRITE, file_out, &pos_out, len);
if (ret < 0)
return ret;
if (!(file_in->f_mode & FMODE_READ) ||
!(file_out->f_mode & FMODE_WRITE) ||
(file_out->f_flags & O_APPEND))
return -EBADF;
/* this could be relaxed once a method supports cross-fs copies */
if (inode_in->i_sb != inode_out->i_sb)
return -EXDEV;
if (len == 0)
return 0;
ret = mnt_want_write_file(file_out);
if (ret)
return ret;
ret = -EOPNOTSUPP;
if (file_out->f_op->copy_file_range)
ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
pos_out, len, flags);
if (ret == -EOPNOTSUPP)
ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
if (ret > 0) {
fsnotify_access(file_in);
add_rchar(current, ret);
fsnotify_modify(file_out);
add_wchar(current, ret);
}
inc_syscr(current);
inc_syscw(current);
mnt_drop_write_file(file_out);
return ret;
}
EXPORT_SYMBOL(vfs_copy_file_range);
SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
int, fd_out, loff_t __user *, off_out,
size_t, len, unsigned int, flags)
{
loff_t pos_in;
loff_t pos_out;
struct fd f_in;
struct fd f_out;
ssize_t ret = -EBADF;
f_in = fdget(fd_in);
if (!f_in.file)
goto out2;
f_out = fdget(fd_out);
if (!f_out.file)
goto out1;
ret = -EFAULT;
if (off_in) {
if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
goto out;
} else {
pos_in = f_in.file->f_pos;
}
if (off_out) {
if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
goto out;
} else {
pos_out = f_out.file->f_pos;
}
ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
flags);
if (ret > 0) {
pos_in += ret;
pos_out += ret;
if (off_in) {
if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
ret = -EFAULT;
} else {
f_in.file->f_pos = pos_in;
}
if (off_out) {
if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
ret = -EFAULT;
} else {
f_out.file->f_pos = pos_out;
}
}
out:
fdput(f_out);
out1:
fdput(f_in);
out2:
return ret;
}
static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
{
struct inode *inode = file_inode(file);
if (unlikely(pos < 0))
return -EINVAL;
if (unlikely((loff_t) (pos + len) < 0))
return -EINVAL;
if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
loff_t end = len ? pos + len - 1 : OFFSET_MAX;
int retval;
retval = locks_mandatory_area(inode, file, pos, end,
write ? F_WRLCK : F_RDLCK);
if (retval < 0)
return retval;
}
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}
int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len)
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
int ret;
if (inode_in->i_sb != inode_out->i_sb ||
file_in->f_path.mnt != file_out->f_path.mnt)
return -EXDEV;
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
if (!(file_in->f_mode & FMODE_READ) ||
!(file_out->f_mode & FMODE_WRITE) ||
(file_out->f_flags & O_APPEND) ||
!file_in->f_op->clone_file_range)
return -EBADF;
ret = clone_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
ret = clone_verify_area(file_out, pos_out, len, true);
if (ret)
return ret;
if (pos_in + len > i_size_read(inode_in))
return -EINVAL;
ret = mnt_want_write_file(file_out);
if (ret)
return ret;
ret = file_in->f_op->clone_file_range(file_in, pos_in,
file_out, pos_out, len);
if (!ret) {
fsnotify_access(file_in);
fsnotify_modify(file_out);
}
mnt_drop_write_file(file_out);
return ret;
}
EXPORT_SYMBOL(vfs_clone_file_range);
int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
struct file_dedupe_range_info *info;
struct inode *src = file_inode(file);
u64 off;
u64 len;
int i;
int ret;
bool is_admin = capable(CAP_SYS_ADMIN);
u16 count = same->dest_count;
struct file *dst_file;
loff_t dst_off;
ssize_t deduped;
if (!(file->f_mode & FMODE_READ))
return -EINVAL;
if (same->reserved1 || same->reserved2)
return -EINVAL;
off = same->src_offset;
len = same->src_length;
ret = -EISDIR;
if (S_ISDIR(src->i_mode))
goto out;
ret = -EINVAL;
if (!S_ISREG(src->i_mode))
goto out;
ret = clone_verify_area(file, off, len, false);
if (ret < 0)
goto out;
ret = 0;
/* pre-format output fields to sane values */
for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;
same->info[i].status = FILE_DEDUPE_RANGE_SAME;
}
for (i = 0, info = same->info; i < count; i++, info++) {
struct inode *dst;
struct fd dst_fd = fdget(info->dest_fd);
dst_file = dst_fd.file;
if (!dst_file) {
info->status = -EBADF;
goto next_loop;
}
dst = file_inode(dst_file);
ret = mnt_want_write_file(dst_file);
if (ret) {
info->status = ret;
goto next_loop;
}
dst_off = info->dest_offset;
ret = clone_verify_area(dst_file, dst_off, len, true);
if (ret < 0) {
info->status = ret;
goto next_file;
}
ret = 0;
if (info->reserved) {
info->status = -EINVAL;
} else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
info->status = -EINVAL;
} else if (file->f_path.mnt != dst_file->f_path.mnt) {
info->status = -EXDEV;
} else if (S_ISDIR(dst->i_mode)) {
info->status = -EISDIR;
} else if (dst_file->f_op->dedupe_file_range == NULL) {
info->status = -EINVAL;
} else {
deduped = dst_file->f_op->dedupe_file_range(file, off,
len, dst_file,
info->dest_offset);
if (deduped == -EBADE)
info->status = FILE_DEDUPE_RANGE_DIFFERS;
else if (deduped < 0)
info->status = deduped;
else
info->bytes_deduped += deduped;
}
next_file:
mnt_drop_write_file(dst_file);
next_loop:
fdput(dst_fd);
}
out:
return ret;
}
EXPORT_SYMBOL(vfs_dedupe_file_range);
......@@ -1630,6 +1630,12 @@ struct file_operations {
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
u64);
ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
u64);
};
struct inode_operations {
......@@ -1680,6 +1686,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *);
extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
unsigned long, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
extern int vfs_dedupe_file_range(struct file *file,
struct file_dedupe_range *same);
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
......@@ -2027,12 +2039,9 @@ extern struct kobject *fs_kobj;
#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
#ifdef CONFIG_MANDATORY_FILE_LOCKING
extern int locks_mandatory_locked(struct file *);
extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
/*
* Candidates for mandatory locking have the setgid bit set
......@@ -2062,17 +2071,19 @@ static inline int locks_verify_locked(struct file *file)
}
static inline int locks_verify_truncate(struct inode *inode,
struct file *filp,
struct file *f,
loff_t size)
{
if (inode->i_flctx && mandatory_lock(inode))
return locks_mandatory_area(
FLOCK_VERIFY_WRITE, inode, filp,
size < inode->i_size ? size : inode->i_size,
(size < inode->i_size ? inode->i_size - size
: size - inode->i_size)
);
return 0;
if (!inode->i_flctx || !mandatory_lock(inode))
return 0;
if (size < inode->i_size) {
return locks_mandatory_area(inode, f, size, inode->i_size - 1,
F_WRLCK);
} else {
return locks_mandatory_area(inode, f, inode->i_size, size - 1,
F_WRLCK);
}
}
#else /* !CONFIG_MANDATORY_FILE_LOCKING */
......@@ -2082,9 +2093,8 @@ static inline int locks_mandatory_locked(struct file *file)
return 0;
}
static inline int locks_mandatory_area(int rw, struct inode *inode,
struct file *filp, loff_t offset,
size_t count)
static inline int locks_mandatory_area(struct inode *inode, struct file *filp,
loff_t start, loff_t end, unsigned char type)
{
return 0;
}
......
......@@ -139,10 +139,10 @@ enum nfs_opnum4 {
Needs to be updated if more operations are defined in future.*/
#define FIRST_NFS4_OP OP_ACCESS
#define LAST_NFS4_OP OP_WRITE_SAME
#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER
#define LAST_NFS41_OP OP_RECLAIM_COMPLETE
#define LAST_NFS42_OP OP_WRITE_SAME
#define LAST_NFS42_OP OP_CLONE
#define LAST_NFS4_OP LAST_NFS42_OP
enum nfsstat4 {
NFS4_OK = 0,
......
......@@ -886,6 +886,9 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
const char __user *const __user *envp, int flags);
asmlinkage long sys_membarrier(int cmd, int flags);
asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in,
int fd_out, loff_t __user *off_out,
size_t len, unsigned int flags);
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
......
......@@ -715,9 +715,11 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
__SYSCALL(__NR_membarrier, sys_membarrier)
#define __NR_mlock2 284
__SYSCALL(__NR_mlock2, sys_mlock2)
#define __NR_copy_file_range 285
__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
#undef __NR_syscalls
#define __NR_syscalls 285
#define __NR_syscalls 286
/*
* All syscalls below here should go away really,
......
......@@ -39,12 +39,48 @@
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
struct file_clone_range {
__s64 src_fd;
__u64 src_offset;
__u64 src_length;
__u64 dest_offset;
};
struct fstrim_range {
__u64 start;
__u64 len;
__u64 minlen;
};
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
/* from struct btrfs_ioctl_file_extent_same_info */
struct file_dedupe_range_info {
__s64 dest_fd; /* in - destination file */
__u64 dest_offset; /* in - start of extent in destination */
__u64 bytes_deduped; /* out - total # of bytes we were able
* to dedupe from this file. */
/* status of this dedupe operation:
* < 0 for error
* == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
* == FILE_DEDUPE_RANGE_DIFFERS if data differs
*/
__s32 status; /* out - see above description */
__u32 reserved; /* must be zero */
};
/* from struct btrfs_ioctl_file_extent_same_args */
struct file_dedupe_range {
__u64 src_offset; /* in - start of extent in source */
__u64 src_length; /* in - length of extent */
__u16 dest_count; /* in - total elements in info array */
__u16 reserved1; /* must be zero */
__u32 reserved2; /* must be zero */
struct file_dedupe_range_info info[0];
};
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
unsigned long nr_files; /* read only */
......@@ -159,6 +195,9 @@ struct inodes_stat_t {
#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
#define FITHAW _IOWR('X', 120, int) /* Thaw */
#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
#define FICLONE _IOW(0x94, 9, int)
#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range)
#define FS_IOC_GETFLAGS _IOR('f', 1, long)
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
......
......@@ -174,6 +174,7 @@ cond_syscall(sys_setfsuid);
cond_syscall(sys_setfsgid);
cond_syscall(sys_capget);
cond_syscall(sys_capset);
cond_syscall(sys_copy_file_range);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment