Commit 1af764e1 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix writev atomicity on pipe/fifo

From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

Current writev() of pipe/fifo can be interleaved with data from other
processes doing writes even when the requests size is <= PIPE_BUF.  These
writes should in fact be atomic.

The readv() side is also supported for same behavior with read().  And it
is faster.

readv/writev version of bw_pipe in LMbench

2.6.0-test9-bk12
hirofumi@devron (i686-pc-linux-gnu)[1010]$ ./bw_pipe -m 4096 -M 5
Pipe bandwidth: 45.53 MB/sec
hirofumi@devron (i686-pc-linux-gnu)[1009]$ ./bw_pipe -m 1024 -M 5
Pipe bandwidth: 20.08 MB/sec

2.6.0-test9-bk12 + patch
hirofumi@devron (i686-pc-linux-gnu)[1001]$ ./bw_pipe -m 4096 -M 5
Pipe bandwidth: 65.98 MB/sec
hirofumi@devron (i686-pc-linux-gnu)[1002]$ ./bw_pipe -m 1024 -M 5
Pipe bandwidth: 32.19 MB/sec
parent ed109bc5
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/pipe_fs_i.h> #include <linux/pipe_fs_i.h>
#include <linux/uio.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/ioctls.h> #include <asm/ioctls.h>
...@@ -43,19 +44,63 @@ void pipe_wait(struct inode * inode) ...@@ -43,19 +44,63 @@ void pipe_wait(struct inode * inode)
down(PIPE_SEM(*inode)); down(PIPE_SEM(*inode));
} }
static inline int
pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
{
unsigned long copy;
while (len > 0) {
while (!iov->iov_len)
iov++;
copy = min_t(unsigned long, len, iov->iov_len);
if (copy_from_user(to, iov->iov_base, copy))
return -EFAULT;
to += copy;
len -= copy;
iov->iov_base += copy;
iov->iov_len -= copy;
}
return 0;
}
static inline int
pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
{
unsigned long copy;
while (len > 0) {
while (!iov->iov_len)
iov++;
copy = min_t(unsigned long, len, iov->iov_len);
if (copy_to_user(iov->iov_base, from, copy))
return -EFAULT;
from += copy;
len -= copy;
iov->iov_base += copy;
iov->iov_len -= copy;
}
return 0;
}
static ssize_t static ssize_t
pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) pipe_readv(struct file *filp, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{ {
struct inode *inode = filp->f_dentry->d_inode; struct inode *inode = filp->f_dentry->d_inode;
int do_wakeup; int do_wakeup;
ssize_t ret; ssize_t ret;
struct iovec *iov = (struct iovec *)_iov;
size_t total_len;
/* pread is not allowed on pipes. */ /* pread is not allowed on pipes. */
if (unlikely(ppos != &filp->f_pos)) if (unlikely(ppos != &filp->f_pos))
return -ESPIPE; return -ESPIPE;
total_len = iov_length(iov, nr_segs);
/* Null read succeeds. */ /* Null read succeeds. */
if (unlikely(count == 0)) if (unlikely(total_len == 0))
return 0; return 0;
do_wakeup = 0; do_wakeup = 0;
...@@ -67,12 +112,12 @@ pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) ...@@ -67,12 +112,12 @@ pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode); char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
ssize_t chars = PIPE_MAX_RCHUNK(*inode); ssize_t chars = PIPE_MAX_RCHUNK(*inode);
if (chars > count) if (chars > total_len)
chars = count; chars = total_len;
if (chars > size) if (chars > size)
chars = size; chars = size;
if (copy_to_user(buf, pipebuf, chars)) { if (pipe_iov_copy_to_user(iov, pipebuf, chars)) {
if (!ret) ret = -EFAULT; if (!ret) ret = -EFAULT;
break; break;
} }
...@@ -81,12 +126,11 @@ pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) ...@@ -81,12 +126,11 @@ pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
PIPE_START(*inode) += chars; PIPE_START(*inode) += chars;
PIPE_START(*inode) &= (PIPE_SIZE - 1); PIPE_START(*inode) &= (PIPE_SIZE - 1);
PIPE_LEN(*inode) -= chars; PIPE_LEN(*inode) -= chars;
count -= chars; total_len -= chars;
buf += chars;
do_wakeup = 1; do_wakeup = 1;
} if (!total_len)
if (!count)
break; /* common path: read succeeded */ break; /* common path: read succeeded */
}
if (PIPE_LEN(*inode)) /* test for cyclic buffers */ if (PIPE_LEN(*inode)) /* test for cyclic buffers */
continue; continue;
if (!PIPE_WRITERS(*inode)) if (!PIPE_WRITERS(*inode))
...@@ -126,24 +170,35 @@ pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) ...@@ -126,24 +170,35 @@ pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
} }
static ssize_t static ssize_t
pipe_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = count };
return pipe_readv(filp, &iov, 1, ppos);
}
static ssize_t
pipe_writev(struct file *filp, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{ {
struct inode *inode = filp->f_dentry->d_inode; struct inode *inode = filp->f_dentry->d_inode;
ssize_t ret; ssize_t ret;
size_t min; size_t min;
int do_wakeup; int do_wakeup;
struct iovec *iov = (struct iovec *)_iov;
size_t total_len;
/* pwrite is not allowed on pipes. */ /* pwrite is not allowed on pipes. */
if (unlikely(ppos != &filp->f_pos)) if (unlikely(ppos != &filp->f_pos))
return -ESPIPE; return -ESPIPE;
total_len = iov_length(iov, nr_segs);
/* Null write succeeds. */ /* Null write succeeds. */
if (unlikely(count == 0)) if (unlikely(total_len == 0))
return 0; return 0;
do_wakeup = 0; do_wakeup = 0;
ret = 0; ret = 0;
min = count; min = total_len;
if (min > PIPE_BUF) if (min > PIPE_BUF)
min = 1; min = 1;
down(PIPE_SEM(*inode)); down(PIPE_SEM(*inode));
...@@ -164,23 +219,22 @@ pipe_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos ...@@ -164,23 +219,22 @@ pipe_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos
* syscall merging. * syscall merging.
*/ */
do_wakeup = 1; do_wakeup = 1;
if (chars > count) if (chars > total_len)
chars = count; chars = total_len;
if (chars > free) if (chars > free)
chars = free; chars = free;
if (copy_from_user(pipebuf, buf, chars)) { if (pipe_iov_copy_from_user(pipebuf, iov, chars)) {
if (!ret) ret = -EFAULT; if (!ret) ret = -EFAULT;
break; break;
} }
ret += chars; ret += chars;
PIPE_LEN(*inode) += chars; PIPE_LEN(*inode) += chars;
count -= chars; total_len -= chars;
buf += chars; if (!total_len)
}
if (!count)
break; break;
}
if (PIPE_FREE(*inode) && ret) { if (PIPE_FREE(*inode) && ret) {
/* handle cyclic data buffers */ /* handle cyclic data buffers */
min = 1; min = 1;
...@@ -213,6 +267,14 @@ pipe_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos ...@@ -213,6 +267,14 @@ pipe_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos
return ret; return ret;
} }
static ssize_t
pipe_write(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
return pipe_writev(filp, &iov, 1, ppos);
}
static ssize_t static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{ {
...@@ -405,6 +467,7 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) ...@@ -405,6 +467,7 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
struct file_operations read_fifo_fops = { struct file_operations read_fifo_fops = {
.llseek = no_llseek, .llseek = no_llseek,
.read = pipe_read, .read = pipe_read,
.readv = pipe_readv,
.write = bad_pipe_w, .write = bad_pipe_w,
.poll = fifo_poll, .poll = fifo_poll,
.ioctl = pipe_ioctl, .ioctl = pipe_ioctl,
...@@ -417,6 +480,7 @@ struct file_operations write_fifo_fops = { ...@@ -417,6 +480,7 @@ struct file_operations write_fifo_fops = {
.llseek = no_llseek, .llseek = no_llseek,
.read = bad_pipe_r, .read = bad_pipe_r,
.write = pipe_write, .write = pipe_write,
.writev = pipe_writev,
.poll = fifo_poll, .poll = fifo_poll,
.ioctl = pipe_ioctl, .ioctl = pipe_ioctl,
.open = pipe_write_open, .open = pipe_write_open,
...@@ -427,7 +491,9 @@ struct file_operations write_fifo_fops = { ...@@ -427,7 +491,9 @@ struct file_operations write_fifo_fops = {
struct file_operations rdwr_fifo_fops = { struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek, .llseek = no_llseek,
.read = pipe_read, .read = pipe_read,
.readv = pipe_readv,
.write = pipe_write, .write = pipe_write,
.writev = pipe_writev,
.poll = fifo_poll, .poll = fifo_poll,
.ioctl = pipe_ioctl, .ioctl = pipe_ioctl,
.open = pipe_rdwr_open, .open = pipe_rdwr_open,
...@@ -438,6 +504,7 @@ struct file_operations rdwr_fifo_fops = { ...@@ -438,6 +504,7 @@ struct file_operations rdwr_fifo_fops = {
struct file_operations read_pipe_fops = { struct file_operations read_pipe_fops = {
.llseek = no_llseek, .llseek = no_llseek,
.read = pipe_read, .read = pipe_read,
.readv = pipe_readv,
.write = bad_pipe_w, .write = bad_pipe_w,
.poll = pipe_poll, .poll = pipe_poll,
.ioctl = pipe_ioctl, .ioctl = pipe_ioctl,
...@@ -450,6 +517,7 @@ struct file_operations write_pipe_fops = { ...@@ -450,6 +517,7 @@ struct file_operations write_pipe_fops = {
.llseek = no_llseek, .llseek = no_llseek,
.read = bad_pipe_r, .read = bad_pipe_r,
.write = pipe_write, .write = pipe_write,
.writev = pipe_writev,
.poll = pipe_poll, .poll = pipe_poll,
.ioctl = pipe_ioctl, .ioctl = pipe_ioctl,
.open = pipe_write_open, .open = pipe_write_open,
...@@ -460,7 +528,9 @@ struct file_operations write_pipe_fops = { ...@@ -460,7 +528,9 @@ struct file_operations write_pipe_fops = {
struct file_operations rdwr_pipe_fops = { struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek, .llseek = no_llseek,
.read = pipe_read, .read = pipe_read,
.readv = pipe_readv,
.write = pipe_write, .write = pipe_write,
.writev = pipe_writev,
.poll = pipe_poll, .poll = pipe_poll,
.ioctl = pipe_ioctl, .ioctl = pipe_ioctl,
.open = pipe_rdwr_open, .open = pipe_rdwr_open,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment