Commit d1f53233 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull VFS splice updates from Al Viro:
 "There's a bunch of branches this cycle, both mine and from other folks
  and I'd rather send pull requests separately.

  This one is the conversion of ->splice_read() to ITER_PIPE iov_iter
  (and introduction of such). Gets rid of a lot of code in fs/splice.c
  and elsewhere; there will be followups, but these are for the next
  cycle...  Some pipe/splice-related cleanups from Miklos in the same
  branch as well"

* 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  pipe: fix comment in pipe_buf_operations
  pipe: add pipe_buf_steal() helper
  pipe: add pipe_buf_confirm() helper
  pipe: add pipe_buf_release() helper
  pipe: add pipe_buf_get() helper
  relay: simplify relay_file_read()
  switch default_file_splice_read() to use of pipe-backed iov_iter
  switch generic_file_splice_read() to use of ->read_iter()
  new iov_iter flavour: pipe-backed
  fuse_dev_splice_read(): switch to add_to_pipe()
  skb_splice_bits(): get rid of callback
  new helper: add_to_pipe()
  splice: lift pipe_lock out of splice_to_pipe()
  splice: switch get_iovec_page_array() to iov_iter
  splice_to_pipe(): don't open-code wakeup_pipe_readers()
  consistent treatment of EFAULT on O_DIRECT read/write
parents 2eee010d a949e639
......@@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
return 0;
/* Try lock this page */
if (buf->ops->steal(pipe, buf) == 0) {
if (pipe_buf_steal(pipe, buf) == 0) {
/* Get reference and unlock page for moving */
get_page(buf->page);
unlock_page(buf->page);
......
......@@ -1138,45 +1138,31 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
range_lock_init(&range, *ppos, *ppos + count - 1);
vio->vui_fd = LUSTRE_FPRIVATE(file);
vio->vui_io_subtype = args->via_io_subtype;
vio->vui_iter = args->u.normal.via_iter;
vio->vui_iocb = args->u.normal.via_iocb;
/*
* Direct IO reads must also take range lock,
* or multiple reads will try to work on the same pages
* See LU-6227 for details.
*/
if (((iot == CIT_WRITE) ||
(iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
!(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
range.rl_node.in_extent.start,
range.rl_node.in_extent.end);
result = range_lock(&lli->lli_write_tree,
&range);
if (result < 0)
goto out;
switch (vio->vui_io_subtype) {
case IO_NORMAL:
vio->vui_iter = args->u.normal.via_iter;
vio->vui_iocb = args->u.normal.via_iocb;
/*
* Direct IO reads must also take range lock,
* or multiple reads will try to work on the same pages
* See LU-6227 for details.
*/
if (((iot == CIT_WRITE) ||
(iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
!(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
range.rl_node.in_extent.start,
range.rl_node.in_extent.end);
result = range_lock(&lli->lli_write_tree,
&range);
if (result < 0)
goto out;
range_locked = true;
}
down_read(&lli->lli_trunc_sem);
break;
case IO_SPLICE:
vio->u.splice.vui_pipe = args->u.splice.via_pipe;
vio->u.splice.vui_flags = args->u.splice.via_flags;
break;
default:
CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
LBUG();
range_locked = true;
}
down_read(&lli->lli_trunc_sem);
ll_cl_add(file, env, io);
result = cl_io_loop(env, io);
ll_cl_remove(file, env);
if (args->via_io_subtype == IO_NORMAL)
up_read(&lli->lli_trunc_sem);
up_read(&lli->lli_trunc_sem);
if (range_locked) {
CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
range.rl_node.in_extent.start,
......@@ -1235,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (IS_ERR(env))
return PTR_ERR(env);
args = ll_env_args(env, IO_NORMAL);
args = ll_env_args(env);
args->u.normal.via_iter = to;
args->u.normal.via_iocb = iocb;
......@@ -1259,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_ERR(env))
return PTR_ERR(env);
args = ll_env_args(env, IO_NORMAL);
args = ll_env_args(env);
args->u.normal.via_iter = from;
args->u.normal.via_iocb = iocb;
......@@ -1269,31 +1255,6 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return result;
}
/*
* Send file content (through pagecache) somewhere with helper
*/
static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
struct lu_env *env;
struct vvp_io_args *args;
ssize_t result;
int refcheck;
env = cl_env_get(&refcheck);
if (IS_ERR(env))
return PTR_ERR(env);
args = ll_env_args(env, IO_SPLICE);
args->u.splice.via_pipe = pipe;
args->u.splice.via_flags = flags;
result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
cl_env_put(env, &refcheck);
return result;
}
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
__u64 flags, struct lov_user_md *lum,
int lum_size)
......@@ -3267,7 +3228,7 @@ struct file_operations ll_file_operations = {
.release = ll_file_release,
.mmap = ll_file_mmap,
.llseek = ll_file_seek,
.splice_read = ll_file_splice_read,
.splice_read = generic_file_splice_read,
.fsync = ll_fsync,
.flush = ll_flush
};
......@@ -3280,7 +3241,7 @@ struct file_operations ll_file_operations_flock = {
.release = ll_file_release,
.mmap = ll_file_mmap,
.llseek = ll_file_seek,
.splice_read = ll_file_splice_read,
.splice_read = generic_file_splice_read,
.fsync = ll_fsync,
.flush = ll_flush,
.flock = ll_file_flock,
......@@ -3296,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = {
.release = ll_file_release,
.mmap = ll_file_mmap,
.llseek = ll_file_seek,
.splice_read = ll_file_splice_read,
.splice_read = generic_file_splice_read,
.fsync = ll_fsync,
.flush = ll_flush,
.flock = ll_file_noflock,
......
......@@ -908,17 +908,11 @@ void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
*/
struct vvp_io_args {
/** normal/splice */
enum vvp_io_subtype via_io_subtype;
union {
struct {
struct kiocb *via_iocb;
struct iov_iter *via_iter;
} normal;
struct {
struct pipe_inode_info *via_pipe;
unsigned int via_flags;
} splice;
} u;
};
......@@ -946,14 +940,9 @@ static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
return lti;
}
static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
enum vvp_io_subtype type)
static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
{
struct vvp_io_args *via = &ll_env_info(env)->lti_args;
via->via_io_subtype = type;
return via;
return &ll_env_info(env)->lti_args;
}
void ll_queue_done_writing(struct inode *inode, unsigned long flags);
......
......@@ -49,14 +49,6 @@ struct obd_device;
struct obd_export;
struct page;
/* specific architecture can implement only part of this list */
enum vvp_io_subtype {
/** normal IO */
IO_NORMAL,
/** io started from splice_{read|write} */
IO_SPLICE
};
/**
* IO state private to IO state private to VVP layer.
*/
......@@ -98,10 +90,6 @@ struct vvp_io {
*/
bool ft_flags_valid;
} fault;
struct {
struct pipe_inode_info *vui_pipe;
unsigned int vui_flags;
} splice;
struct {
struct cl_page_list vui_queue;
unsigned long vui_written;
......@@ -110,8 +98,6 @@ struct vvp_io {
} write;
} u;
enum vvp_io_subtype vui_io_subtype;
/**
* Layout version when this IO is initialized
*/
......
......@@ -53,18 +53,6 @@ static struct vvp_io *cl2vvp_io(const struct lu_env *env,
return vio;
}
/**
* True, if \a io is a normal io, False for splice_{read,write}
*/
static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
{
struct vvp_io *vio = vvp_env_io(env);
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
return vio->vui_io_subtype == IO_NORMAL;
}
/**
* For swapping layout. The file's layout may have changed.
* To avoid populating pages to a wrong stripe, we have to verify the
......@@ -390,9 +378,6 @@ static int vvp_mmap_locks(const struct lu_env *env,
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
if (!cl_is_normalio(env, io))
return 0;
if (!vio->vui_iter) /* nfs or loop back device write */
return 0;
......@@ -461,15 +446,10 @@ static void vvp_io_advance(const struct lu_env *env,
const struct cl_io_slice *ios,
size_t nob)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = ios->cis_io->ci_obj;
struct vvp_io *vio = cl2vvp_io(env, ios);
CLOBINVRNT(env, obj, vvp_object_invariant(obj));
if (!cl_is_normalio(env, io))
return;
iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
}
......@@ -478,7 +458,7 @@ static void vvp_io_update_iov(const struct lu_env *env,
{
size_t size = io->u.ci_rw.crw_count;
if (!cl_is_normalio(env, io) || !vio->vui_iter)
if (!vio->vui_iter)
return;
iov_iter_truncate(vio->vui_iter, size);
......@@ -715,25 +695,8 @@ static int vvp_io_read_start(const struct lu_env *env,
/* BUG: 5972 */
file_accessed(file);
switch (vio->vui_io_subtype) {
case IO_NORMAL:
LASSERT(vio->vui_iocb->ki_pos == pos);
result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
break;
case IO_SPLICE:
result = generic_file_splice_read(file, &pos,
vio->u.splice.vui_pipe, cnt,
vio->u.splice.vui_flags);
/* LU-1109: do splice read stripe by stripe otherwise if it
* may make nfsd stuck if this read occupied all internal pipe
* buffers.
*/
io->ci_continue = 0;
break;
default:
CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
LBUG();
}
LASSERT(vio->vui_iocb->ki_pos == pos);
result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
out:
if (result >= 0) {
......
......@@ -37,27 +37,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
}
static ssize_t
coda_file_splice_read(struct file *coda_file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
ssize_t (*splice_read)(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
struct coda_file_info *cfi;
struct file *host_file;
cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
splice_read = host_file->f_op->splice_read;
if (!splice_read)
splice_read = default_file_splice_read;
return splice_read(host_file, ppos, pipe, count, flags);
}
static ssize_t
coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
{
......@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
.splice_read = coda_file_splice_read,
.splice_read = generic_file_splice_read,
};
......@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if ((dio->op == REQ_OP_READ) &&
((offset + transferred) > dio->i_size))
transferred = dio->i_size - offset;
/* ignore EFAULT if some IO has been done */
if (unlikely(ret == -EFAULT) && transferred)
ret = 0;
}
if (ret == 0)
......
......@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
struct pipe_buffer *buf = cs->pipebufs;
if (!cs->write) {
err = buf->ops->confirm(cs->pipe, buf);
err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
......@@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
fuse_copy_finish(cs);
err = buf->ops->confirm(cs->pipe, buf);
err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
......@@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (cs->len != PAGE_SIZE)
goto out_fallback;
if (buf->ops->steal(cs->pipe, buf) != 0)
if (pipe_buf_steal(cs->pipe, buf) != 0)
goto out_fallback;
newpage = buf->page;
......@@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
int ret;
int total, ret;
int page_nr = 0;
int do_wakeup = 0;
struct pipe_buffer *bufs;
struct fuse_copy_state cs;
struct fuse_dev *fud = fuse_get_dev(in);
......@@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;
ret = 0;
pipe_lock(pipe);
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
if (!ret)
ret = -EPIPE;
goto out_unlock;
}
if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
ret = -EIO;
goto out_unlock;
goto out;
}
while (page_nr < cs.nr_segs) {
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + newbuf;
buf->page = bufs[page_nr].page;
buf->offset = bufs[page_nr].offset;
buf->len = bufs[page_nr].len;
for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
/*
* Need to be careful about this. Having buf->ops in module
* code can Oops if the buffer persists after module unload.
*/
buf->ops = &nosteal_pipe_buf_ops;
pipe->nrbufs++;
page_nr++;
ret += buf->len;
if (pipe->files)
do_wakeup = 1;
}
out_unlock:
pipe_unlock(pipe);
if (do_wakeup) {
smp_mb();
if (waitqueue_active(&pipe->wait))
wake_up_interruptible(&pipe->wait);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
bufs[page_nr].ops = &nosteal_pipe_buf_ops;
ret = add_to_pipe(pipe, &bufs[page_nr++]);
if (unlikely(ret < 0))
break;
}
if (total)
ret = total;
out:
for (; page_nr < cs.nr_segs; page_nr++)
put_page(bufs[page_nr].page);
......@@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
ibuf->ops->get(pipe, ibuf);
pipe_buf_get(pipe, ibuf);
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
......@@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
for (idx = 0; idx < nbuf; idx++) {
struct pipe_buffer *buf = &bufs[idx];
buf->ops->release(pipe, buf);
}
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
out:
kfree(bufs);
return ret;
......
......@@ -954,30 +954,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
return ret;
}
static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct inode *inode = in->f_mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int ret;
inode_lock(inode);
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
if (ret) {
inode_unlock(inode);
return ret;
}
gfs2_glock_dq_uninit(&gh);
inode_unlock(inode);
return generic_file_splice_read(in, ppos, pipe, len, flags);
}
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
......@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
.splice_read = gfs2_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
......@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = gfs2_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
......
......@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
}
EXPORT_SYMBOL_GPL(nfs_file_read);
ssize_t
nfs_file_splice_read(struct file *filp, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
struct inode *inode = file_inode(filp);
ssize_t res;
dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
filp, (unsigned long) count, (unsigned long long) *ppos);
nfs_start_io_read(inode);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (!res) {
res = generic_file_splice_read(filp, ppos, pipe, count, flags);
if (res > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
}
nfs_end_io_read(inode);
return res;
}
EXPORT_SYMBOL_GPL(nfs_file_splice_read);
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
......@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
......
......@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
......
......@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
......
......@@ -2321,36 +2321,6 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
return ret;
}
static ssize_t ocfs2_file_splice_read(struct file *in,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len,
unsigned int flags)
{
int ret = 0, lock_level = 0;
struct inode *inode = file_inode(in);
trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
in->f_path.dentry->d_name.len,
in->f_path.dentry->d_name.name, len);
/*
* See the comment in ocfs2_file_read_iter()
*/
ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
if (ret < 0) {
mlog_errno(ret);
goto bail;
}
ocfs2_inode_unlock(inode, lock_level);
ret = generic_file_splice_read(in, ppos, pipe, len, flags);
bail:
return ret;
}
static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
struct iov_iter *to)
{
......@@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
......@@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
......
......@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
......
......@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (bufs) {
int curbuf = pipe->curbuf;
struct pipe_buffer *buf = pipe->bufs + curbuf;
const struct pipe_buf_operations *ops = buf->ops;
size_t chars = buf->len;
size_t written;
int error;
......@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (chars > total_len)
chars = total_len;
error = ops->confirm(pipe, buf);
error = pipe_buf_confirm(pipe, buf);
if (error) {
if (!ret)
ret = error;
......@@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
}
if (!buf->len) {
buf->ops = NULL;
ops->release(pipe, buf);
pipe_buf_release(pipe, buf);
curbuf = (curbuf + 1) & (pipe->buffers - 1);
pipe->curbuf = curbuf;
pipe->nrbufs = --bufs;
......@@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
(pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + lastbuf;
const struct pipe_buf_operations *ops = buf->ops;
int offset = buf->offset + buf->len;
if (ops->can_merge && offset + chars <= PAGE_SIZE) {
ret = ops->confirm(pipe, buf);
if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
ret = pipe_buf_confirm(pipe, buf);
if (ret)
goto out;
......@@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
buf->ops->release(pipe, buf);
pipe_buf_release(pipe, buf);
}
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
......
This diff is collapsed.
......@@ -393,45 +393,6 @@ xfs_file_read_iter(
return ret;
}
STATIC ssize_t
xfs_file_splice_read(
struct file *infilp,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t count,
unsigned int flags)
{
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
ssize_t ret;
XFS_STATS_INC(ip->i_mount, xs_read_calls);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
trace_xfs_file_splice_read(ip, count, *ppos);
/*
* DAX inodes cannot ues the page cache for splice, so we have to push
* them through the VFS IO path. This means it goes through
* ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
* cannot lock the splice operation at this level for DAX inodes.
*/
if (IS_DAX(VFS_I(ip))) {
ret = default_file_splice_read(infilp, ppos, pipe, count,
flags);
goto out;
}
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
out:
if (ret > 0)
XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
return ret;
}
/*
* Zero any on disk space between the current EOF and the new, larger EOF.
*
......@@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
.splice_read = xfs_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
......
......@@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
DECLARE_EVENT_CLASS(xfs_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
......
......@@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page);
/* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t default_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
......
......@@ -66,15 +66,10 @@ struct pipe_inode_info {
*
* ->confirm()
* ->steal()
* ...
* ->map()
* ...
* ->unmap()
*
* That is, ->map() must be called on a confirmed buffer,
* same goes for ->steal(). See below for the meaning of each
* operation. Also see kerneldoc in fs/pipe.c for the pipe
* and generic variants of these hooks.
* That is, ->steal() must be called on a confirmed buffer.
* See below for the meaning of each operation. Also see kerneldoc
* in fs/pipe.c for the pipe and generic variants of these hooks.
*/
struct pipe_buf_operations {
/*
......@@ -115,6 +110,53 @@ struct pipe_buf_operations {
void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};
/**
* pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
*/
static inline void pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
buf->ops->get(pipe, buf);
}
/**
* pipe_buf_release - put a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to put a reference to
*/
static inline void pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
const struct pipe_buf_operations *ops = buf->ops;
buf->ops = NULL;
ops->release(pipe, buf);
}
/**
* pipe_buf_confirm - verify contents of the pipe buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to confirm
*/
static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
return buf->ops->confirm(pipe, buf);
}
/**
* pipe_buf_steal - attempt to take ownership of a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to attempt to steal
*/
static inline int pipe_buf_steal(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
return buf->ops->steal(pipe, buf);
}
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
......@@ -129,7 +171,6 @@ extern unsigned long pipe_user_pages_hard;
extern unsigned long pipe_user_pages_soft;
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe);
......
......@@ -3064,15 +3064,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
int len, __wsum csum);
ssize_t skb_socket_splice(struct sock *sk,
struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd);
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int len,
unsigned int flags,
ssize_t (*splice_cb)(struct sock *,
struct pipe_inode_info *,
struct splice_pipe_desc *));
unsigned int flags);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
......
......@@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
struct splice_desc *, splice_actor *);
extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
......@@ -83,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
extern const struct pipe_buf_operations default_pipe_buf_ops;
#endif
......@@ -13,6 +13,7 @@
#include <uapi/linux/uio.h>
struct page;
struct pipe_inode_info;
struct kvec {
void *iov_base; /* and that should *never* hold a userland pointer */
......@@ -23,6 +24,7 @@ enum {
ITER_IOVEC = 0,
ITER_KVEC = 2,
ITER_BVEC = 4,
ITER_PIPE = 8,
};
struct iov_iter {
......@@ -33,8 +35,12 @@ struct iov_iter {
const struct iovec *iov;
const struct kvec *kvec;
const struct bio_vec *bvec;
struct pipe_inode_info *pipe;
};
union {
unsigned long nr_segs;
int idx;
};
unsigned long nr_segs;
};
/*
......@@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
}
#define iov_for_each(iov, iter, start) \
if (!((start).type & ITER_BVEC)) \
if (!((start).type & (ITER_BVEC | ITER_PIPE))) \
for (iter = (start); \
(iter).count && \
((iov = iov_iter_iovec(&(iter))), 1); \
......@@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
unsigned long nr_segs, size_t count);
void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
unsigned long nr_segs, size_t count);
void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
size_t count);
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
......@@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i)
static inline bool iter_is_iovec(struct iov_iter *i)
{
return !(i->type & (ITER_BVEC | ITER_KVEC));
return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
}
/*
......
......@@ -1108,51 +1108,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
return end_pos;
}
/*
* subbuf_read_actor - read up to one subbuf's worth of data
*/
static int subbuf_read_actor(size_t read_start,
struct rchan_buf *buf,
size_t avail,
read_descriptor_t *desc)
{
void *from;
int ret = 0;
from = buf->start + read_start;
ret = avail;
if (copy_to_user(desc->arg.buf, from, avail)) {
desc->error = -EFAULT;
ret = 0;
}
desc->arg.data += ret;
desc->written += ret;
desc->count -= ret;
return ret;
}
typedef int (*subbuf_actor_t) (size_t read_start,
struct rchan_buf *buf,
size_t avail,
read_descriptor_t *desc);
/*
* relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
*/
static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
subbuf_actor_t subbuf_actor,
read_descriptor_t *desc)
static ssize_t relay_file_read(struct file *filp,
char __user *buffer,
size_t count,
loff_t *ppos)
{
struct rchan_buf *buf = filp->private_data;
size_t read_start, avail;
size_t written = 0;
int ret;
if (!desc->count)
if (!count)
return 0;
inode_lock(file_inode(filp));
do {
void *from;
if (!relay_file_read_avail(buf, *ppos))
break;
......@@ -1161,32 +1133,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
if (!avail)
break;
avail = min(desc->count, avail);
ret = subbuf_actor(read_start, buf, avail, desc);
if (desc->error < 0)
avail = min(count, avail);
from = buf->start + read_start;
ret = avail;
if (copy_to_user(buffer, from, avail))
break;
if (ret) {
relay_file_read_consume(buf, read_start, ret);
*ppos = relay_file_read_end_pos(buf, read_start, ret);
}
} while (desc->count && ret);
inode_unlock(file_inode(filp));
buffer += ret;
written += ret;
count -= ret;
return desc->written;
}
relay_file_read_consume(buf, read_start, ret);
*ppos = relay_file_read_end_pos(buf, read_start, ret);
} while (count);
inode_unlock(file_inode(filp));
static ssize_t relay_file_read(struct file *filp,
char __user *buffer,
size_t count,
loff_t *ppos)
{
read_descriptor_t desc;
desc.written = 0;
desc.count = count;
desc.arg.buf = buffer;
desc.error = 0;
return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
return written;
}
static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
......
This diff is collapsed.
......@@ -2311,119 +2311,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct address_space *mapping = in->f_mapping;
struct inode *inode = mapping->host;
unsigned int loff, nr_pages, req_pages;
struct page *pages[PIPE_DEF_BUFFERS];
struct partial_page partial[PIPE_DEF_BUFFERS];
struct page *page;
pgoff_t index, end_index;
loff_t isize, left;
int error, page_nr;
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
.nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &page_cache_pipe_buf_ops,
.spd_release = spd_release_page,
};
isize = i_size_read(inode);
if (unlikely(*ppos >= isize))
return 0;
left = isize - *ppos;
if (unlikely(left < len))
len = left;
if (splice_grow_spd(pipe, &spd))
return -ENOMEM;
index = *ppos >> PAGE_SHIFT;
loff = *ppos & ~PAGE_MASK;
req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
nr_pages = min(req_pages, spd.nr_pages_max);
spd.nr_pages = find_get_pages_contig(mapping, index,
nr_pages, spd.pages);
index += spd.nr_pages;
error = 0;
while (spd.nr_pages < nr_pages) {
error = shmem_getpage(inode, index, &page, SGP_CACHE);
if (error)
break;
unlock_page(page);
spd.pages[spd.nr_pages++] = page;
index++;
}
index = *ppos >> PAGE_SHIFT;
nr_pages = spd.nr_pages;
spd.nr_pages = 0;
for (page_nr = 0; page_nr < nr_pages; page_nr++) {
unsigned int this_len;
if (!len)
break;
this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
page = spd.pages[page_nr];
if (!PageUptodate(page) || page->mapping != mapping) {
error = shmem_getpage(inode, index, &page, SGP_CACHE);
if (error)
break;
unlock_page(page);
put_page(spd.pages[page_nr]);
spd.pages[page_nr] = page;
}
isize = i_size_read(inode);
end_index = (isize - 1) >> PAGE_SHIFT;
if (unlikely(!isize || index > end_index))
break;
if (end_index == index) {
unsigned int plen;
plen = ((isize - 1) & ~PAGE_MASK) + 1;
if (plen <= loff)
break;
this_len = min(this_len, plen - loff);
len = this_len;
}
spd.partial[page_nr].offset = loff;
spd.partial[page_nr].len = this_len;
len -= this_len;
loff = 0;
spd.nr_pages++;
index++;
}
while (page_nr < nr_pages)
put_page(spd.pages[page_nr++]);
if (spd.nr_pages)
error = splice_to_pipe(pipe, &spd);
splice_shrink_spd(&spd);
if (error > 0) {
*ppos += error;
file_accessed(in);
}
return error;
}
/*
* llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
*/
......@@ -3786,7 +3673,7 @@ static const struct file_operations shmem_file_operations = {
.read_iter = shmem_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = shmem_file_splice_read,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
#endif
......
......@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return false;
}
ssize_t skb_socket_splice(struct sock *sk,
struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
int ret;
/* Drop the socket lock, otherwise we have reverse
* locking dependencies between sk_lock and i_mutex
* here as compared to sendfile(). We enter here
* with the socket lock held, and splice_to_pipe() will
* grab the pipe inode lock. For sendfile() emulation,
* we call into ->sendpage() with the i_mutex lock held
* and networking will grab the socket lock.
*/
release_sock(sk);
ret = splice_to_pipe(pipe, spd);
lock_sock(sk);
return ret;
}
/*
* Map data from the skb to a pipe. Should handle both the linear part,
* the fragments, and the frag list.
*/
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
unsigned int flags,
ssize_t (*splice_cb)(struct sock *,
struct pipe_inode_info *,
struct splice_pipe_desc *))
unsigned int flags)
{
struct partial_page partial[MAX_SKB_FRAGS];
struct page *pages[MAX_SKB_FRAGS];
......@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
if (spd.nr_pages)
ret = splice_cb(sk, pipe, &spd);
ret = splice_to_pipe(pipe, &spd);
return ret;
}
......
......@@ -691,8 +691,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
int ret;
ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
min(rd_desc->count, len), tss->flags,
skb_socket_splice);
min(rd_desc->count, len), tss->flags);
if (ret > 0)
rd_desc->count -= ret;
return ret;
......
......@@ -1160,19 +1160,6 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
return copied ? : err;
}
static ssize_t kcm_sock_splice(struct sock *sk,
struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
int ret;
release_sock(sk);
ret = splice_to_pipe(pipe, spd);
lock_sock(sk);
return ret;
}
static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
......@@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
if (len > rxm->full_len)
len = rxm->full_len;
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
kcm_sock_splice);
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
if (copied < 0) {
err = copied;
goto err_out;
......
......@@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
return unix_stream_read_generic(&state);
}
static ssize_t skb_unix_socket_splice(struct sock *sk,
struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
int ret;
struct unix_sock *u = unix_sk(sk);
mutex_unlock(&u->iolock);
ret = splice_to_pipe(pipe, spd);
mutex_lock(&u->iolock);
return ret;
}
static int unix_stream_splice_actor(struct sk_buff *skb,
int skip, int chunk,
struct unix_stream_read_state *state)
{
return skb_splice_bits(skb, state->socket->sk,
UNIXCB(skb).consumed + skip,
state->pipe, chunk, state->splice_flags,
skb_unix_socket_splice);
state->pipe, chunk, state->splice_flags);
}
static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment