Commit 5274f052 authored by Jens Axboe's avatar Jens Axboe Committed by Linus Torvalds

[PATCH] Introduce sys_splice() system call

This adds support for the sys_splice system call. Using a pipe as a
transport, it can connect to files or sockets (latter as output only).

From the splice.c comments:

   "splice": joining two ropes together by interweaving their strands.

   This is the "extended pipe" functionality, where a pipe is used as
   an arbitrary in-memory buffer. Think of a pipe as a small kernel
   buffer that you can use to transfer data from one end to the other.

   The traditional unix read/write is extended with a "splice()" operation
   that transfers data buffers to or from a pipe buffer.

   Named by Larry McVoy, original implementation from Linus, extended by
   Jens to support splicing to files and fixing the initial implementation
   bugs.
Signed-off-by: default avatarJens Axboe <axboe@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 5d4fe2c1
......@@ -312,3 +312,4 @@ ENTRY(sys_call_table)
.long sys_unshare /* 310 */
.long sys_set_robust_list
.long sys_get_robust_list
.long sys_splice
......@@ -1605,5 +1605,6 @@ sys_call_table:
data8 sys_ni_syscall // reserved for pselect
data8 sys_ni_syscall // 1295 reserved for ppoll
data8 sys_unshare
data8 sys_splice
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
......@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
ioprio.o pnode.o drop_caches.o
ioprio.o pnode.o drop_caches.o splice.o
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_EPOLL) += eventpoll.o
......
......@@ -53,6 +53,8 @@ const struct file_operations ext2_file_operations = {
.readv = generic_file_readv,
.writev = generic_file_writev,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
#ifdef CONFIG_EXT2_FS_XIP
......
......@@ -119,6 +119,8 @@ const struct file_operations ext3_file_operations = {
.release = ext3_release_file,
.fsync = ext3_sync_file,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
struct inode_operations ext3_file_inode_operations = {
......
......@@ -15,6 +15,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/uio.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
......@@ -94,11 +95,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
{
struct page *page = buf->page;
if (info->tmp_page) {
__free_page(page);
/*
* If nobody else uses this page, and we don't already have a
* temporary page, let's keep track of it as a one-deep
* allocation cache
*/
if (page_count(page) == 1 && !info->tmp_page) {
info->tmp_page = page;
return;
}
info->tmp_page = page;
/*
* Otherwise just release our reference to it
*/
page_cache_release(page);
}
static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
......@@ -152,6 +162,11 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
chars = total_len;
addr = ops->map(filp, info, buf);
if (IS_ERR(addr)) {
if (!ret)
ret = PTR_ERR(addr);
break;
}
error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
ops->unmap(info, buf);
if (unlikely(error)) {
......@@ -254,8 +269,16 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
struct pipe_buf_operations *ops = buf->ops;
int offset = buf->offset + buf->len;
if (ops->can_merge && offset + chars <= PAGE_SIZE) {
void *addr = ops->map(filp, info, buf);
int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
void *addr;
int error;
addr = ops->map(filp, info, buf);
if (IS_ERR(addr)) {
error = PTR_ERR(addr);
goto out;
}
error = pipe_iov_copy_from_user(offset + addr, iov,
chars);
ops->unmap(info, buf);
ret = error;
do_wakeup = 1;
......
......@@ -1576,6 +1576,8 @@ const struct file_operations reiserfs_file_operations = {
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = reiserfs_aio_write,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
struct inode_operations reiserfs_file_inode_operations = {
......
This diff is collapsed.
......@@ -318,8 +318,9 @@
#define __NR_unshare 310
#define __NR_set_robust_list 311
#define __NR_get_robust_list 312
#define __NR_sys_splice 313
#define NR_syscalls 313
#define NR_syscalls 314
/*
* user-visible error numbers are in the range -1 - -128: see
......
......@@ -285,12 +285,13 @@
#define __NR_faccessat 1293
/* 1294, 1295 reserved for pselect/ppoll */
#define __NR_unshare 1296
#define __NR_splice 1297
#ifdef __KERNEL__
#include <linux/config.h>
#define NR_syscalls 273 /* length of syscall table */
#define NR_syscalls 274 /* length of syscall table */
#define __ARCH_WANT_SYS_RT_SIGACTION
......
......@@ -301,8 +301,9 @@
#define __NR_pselect6 280
#define __NR_ppoll 281
#define __NR_unshare 282
#define __NR_splice 283
#define __NR_syscalls 283
#define __NR_syscalls 284
#ifdef __KERNEL__
#define __NR__exit __NR_exit
......
......@@ -609,8 +609,10 @@ __SYSCALL(__NR_unshare, sys_unshare)
__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
#define __NR_get_robust_list 274
__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
#define __NR_splice 275
__SYSCALL(__NR_splice, sys_splice)
#define __NR_syscall_max __NR_get_robust_list
#define __NR_syscall_max __NR_splice
#ifndef __NO_STUBS
......
......@@ -1032,6 +1032,8 @@ struct file_operations {
int (*check_flags)(int);
int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int);
};
struct inode_operations {
......@@ -1609,6 +1611,8 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int);
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
......
......@@ -569,5 +569,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
int flags, int mode);
asmlinkage long sys_unshare(unsigned long unshare_flags);
asmlinkage long sys_splice(int fdin, int fdout, size_t len,
unsigned int flags);
#endif
......@@ -119,6 +119,9 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector,
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);
extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
size_t len, unsigned int flags);
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
......@@ -141,7 +144,8 @@ static struct file_operations socket_file_ops = {
.fasync = sock_fasync,
.readv = sock_readv,
.writev = sock_writev,
.sendpage = sock_sendpage
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment