Commit d1a819a2 authored by Linus Torvalds's avatar Linus Torvalds

splice: teach splice pipe reading about empty pipe buffers

Tetsuo Handa reports that splice() can return 0 before the real EOF, if
the data in the splice source pipe is an empty pipe buffer.  That empty
pipe buffer case doesn't happen in any normal situation, but you can
trigger it by doing a write to a pipe that fails due to a page fault.

Tetsuo has a test-case to show the behavior:

  #define _GNU_SOURCE
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <unistd.h>

  int main(int argc, char *argv[])
  {
	const int fd = open("/tmp/testfile", O_WRONLY | O_CREAT, 0600);
	int pipe_fd[2] = { -1, -1 };
	pipe(pipe_fd);
	write(pipe_fd[1], NULL, 4096);
	/* This splice() should wait unless interrupted. */
	return !splice(pipe_fd[0], NULL, fd, NULL, 65536, 0);
  }

which results in

    write(5, NULL, 4096)                    = -1 EFAULT (Bad address)
    splice(4, NULL, 3, NULL, 65536, 0)      = 0

and this can confuse splice() users into believing they have hit EOF
prematurely.

The issue was introduced when the pipe write code started pre-allocating
the pipe buffers before copying data from user space.

This is modified verion of Tetsuo's original patch.

Fixes: a194dfe6 ("pipe: Rearrange sequence in pipe_write() to preallocate slot")
Link:https://lore.kernel.org/linux-fsdevel/20201005121339.4063-1-penguin-kernel@I-love.SAKURA.ne.jp/Reported-by: default avatarTetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: default avatarTetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 7575fdda
...@@ -526,6 +526,22 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des ...@@ -526,6 +526,22 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
return 1; return 1;
} }
/* We know we have a pipe buffer, but maybe it's empty? */
static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
{
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
if (unlikely(!buf->len)) {
pipe_buf_release(pipe, buf);
pipe->tail = tail+1;
return true;
}
return false;
}
/** /**
* splice_from_pipe_next - wait for some data to splice from * splice_from_pipe_next - wait for some data to splice from
* @pipe: pipe to splice from * @pipe: pipe to splice from
...@@ -545,6 +561,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des ...@@ -545,6 +561,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
if (signal_pending(current)) if (signal_pending(current))
return -ERESTARTSYS; return -ERESTARTSYS;
repeat:
while (pipe_empty(pipe->head, pipe->tail)) { while (pipe_empty(pipe->head, pipe->tail)) {
if (!pipe->writers) if (!pipe->writers)
return 0; return 0;
...@@ -566,6 +583,9 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des ...@@ -566,6 +583,9 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
pipe_wait_readable(pipe); pipe_wait_readable(pipe);
} }
if (eat_empty_buffer(pipe))
goto repeat;
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment