Commit 0613106d authored by Giampaolo Rodolà's avatar Giampaolo Rodolà

Issue 10882: add os.sendfile(). (patch provided by Ross Lagerwall)

parent 96e825ce
...@@ -775,6 +775,47 @@ as internal buffering of data. ...@@ -775,6 +775,47 @@ as internal buffering of data.
:meth:`~file.readline` methods. :meth:`~file.readline` methods.
.. function:: sendfile(out, in, offset, nbytes)
sendfile(out, in, offset, nbytes, headers=None, trailers=None, flags=0)
Copy *nbytes* bytes from file descriptor *in* to file descriptor *out*
starting at *offset*.
Return the number of bytes sent. When EOF is reached return 0.
The first function notation is supported by all platforms that define
:func:`sendfile`.
On Linux, if *offset* is given as ``None``, the bytes are read from the
current position of *in* and the position of *in* is updated.
The second case may be used on Mac OS X and FreeBSD where *headers* and
*trailers* are arbitrary sequences of buffers that are written before and
after the data from *in* is written. It returns the same as the first case.
On Mac OS X and FreeBSD, a value of 0 for *nbytes* specifies to send until
the end of *in* is reached.
On Solaris, *out* may be the file descriptor of a regular file or the file
descriptor of a socket. On all other platforms, *out* must be the file
descriptor of an open socket.
Availability: Unix.
.. versionadded:: 3.3
.. data:: SF_NODISKIO
SF_MNOWAIT
SF_SYNC
Parameters to the :func:`sendfile` function, if the implementation supports
them.
Availability: Unix.
.. versionadded:: 3.3
.. function:: tcgetpgrp(fd) .. function:: tcgetpgrp(fd)
Return the process group associated with the terminal given by *fd* (an open Return the process group associated with the terminal given by *fd* (an open
......
...@@ -68,6 +68,19 @@ New, Improved, and Deprecated Modules ...@@ -68,6 +68,19 @@ New, Improved, and Deprecated Modules
* Stub * Stub
os
--
The :mod:`os` module has a new :func:`~os.sendfile` function which provides an
efficent "zero-copy" way for copying data from one file (or socket) descriptor
to another.
The phrase "zero-copy" refers to the fact that all of the copying of data
between the two descriptors is done entirely by the kernel, with no copying of
data into userspace buffers.
:func:`~os.sendfile` can be used to efficiently copy data from a file on disk to
a network socket, e.g. for downloading a file.
(Patch submitted by Ross Lagerwall and Giampaolo Rodolà in :issue:`10882`.)
Optimizations Optimizations
============= =============
......
...@@ -15,6 +15,13 @@ from test import support ...@@ -15,6 +15,13 @@ from test import support
import contextlib import contextlib
import mmap import mmap
import uuid import uuid
import asyncore
import asynchat
import socket
try:
import threading
except ImportError:
threading = None
# Detect whether we're on a Linux system that uses the (now outdated # Detect whether we're on a Linux system that uses the (now outdated
# and unmaintained) linuxthreads threading library. There's an issue # and unmaintained) linuxthreads threading library. There's an issue
...@@ -1261,6 +1268,251 @@ class LoginTests(unittest.TestCase): ...@@ -1261,6 +1268,251 @@ class LoginTests(unittest.TestCase):
self.assertNotEqual(len(user_name), 0) self.assertNotEqual(len(user_name), 0)
class SendfileTestServer(asyncore.dispatcher, threading.Thread):
class Handler(asynchat.async_chat):
def __init__(self, conn):
asynchat.async_chat.__init__(self, conn)
self.in_buffer = []
self.closed = False
self.push(b"220 ready\r\n")
def handle_read(self):
data = self.recv(4096)
self.in_buffer.append(data)
def get_data(self):
return b''.join(self.in_buffer)
def handle_close(self):
self.close()
self.closed = True
def handle_error(self):
raise
def __init__(self, address):
threading.Thread.__init__(self)
asyncore.dispatcher.__init__(self)
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
self.bind(address)
self.listen(5)
self.host, self.port = self.socket.getsockname()[:2]
self.handler_instance = None
self._active = False
self._active_lock = threading.Lock()
# --- public API
@property
def running(self):
return self._active
def start(self):
assert not self.running
self.__flag = threading.Event()
threading.Thread.start(self)
self.__flag.wait()
def stop(self):
assert self.running
self._active = False
self.join()
def wait(self):
# wait for handler connection to be closed, then stop the server
while not getattr(self.handler_instance, "closed", True):
time.sleep(0.001)
self.stop()
# --- internals
def run(self):
self._active = True
self.__flag.set()
while self._active and asyncore.socket_map:
self._active_lock.acquire()
asyncore.loop(timeout=0.001, count=1)
self._active_lock.release()
asyncore.close_all()
def handle_accept(self):
conn, addr = self.accept()
self.handler_instance = self.Handler(conn)
def handle_connect(self):
self.close()
handle_read = handle_connect
def writable(self):
return 0
def handle_error(self):
raise
@unittest.skipUnless(hasattr(os, 'sendfile'), "test needs os.sendfile()")
class TestSendfile(unittest.TestCase):
DATA = b"12345abcde" * 1024 * 1024 # 10 Mb
SUPPORT_HEADERS_TRAILERS = not sys.platform.startswith("linux") and \
not sys.platform.startswith("solaris")
@classmethod
def setUpClass(cls):
with open(support.TESTFN, "wb") as f:
f.write(cls.DATA)
@classmethod
def tearDownClass(cls):
support.unlink(support.TESTFN)
def setUp(self):
self.server = SendfileTestServer((support.HOST, 0))
self.server.start()
self.client = socket.socket()
self.client.connect((self.server.host, self.server.port))
self.client.settimeout(1)
# synchronize by waiting for "220 ready" response
self.client.recv(1024)
self.sockno = self.client.fileno()
self.file = open(support.TESTFN, 'rb')
self.fileno = self.file.fileno()
def tearDown(self):
self.file.close()
self.client.close()
if self.server.running:
self.server.stop()
def sendfile_wrapper(self, sock, file, offset, nbytes, headers=[], trailers=[]):
"""A higher level wrapper representing how an application is
supposed to use sendfile().
"""
while 1:
try:
if self.SUPPORT_HEADERS_TRAILERS:
return os.sendfile(sock, file, offset, nbytes, headers,
trailers)
else:
return os.sendfile(sock, file, offset, nbytes)
except OSError as err:
if err.errno == errno.ECONNRESET:
# disconnected
raise
elif err.errno in (errno.EAGAIN, errno.EBUSY):
# we have to retry send data
continue
else:
raise
def test_send_whole_file(self):
# normal send
total_sent = 0
offset = 0
nbytes = 4096
while 1:
sent = self.sendfile_wrapper(self.sockno, self.fileno, offset, nbytes)
if sent == 0:
break
offset += sent
total_sent += sent
self.assertTrue(sent <= nbytes)
self.assertEqual(offset, total_sent)
self.assertEqual(total_sent, len(self.DATA))
self.client.close()
self.server.wait()
data = self.server.handler_instance.get_data()
self.assertEqual(hash(data), hash(self.DATA))
def test_send_at_certain_offset(self):
# start sending a file at a certain offset
total_sent = 0
offset = len(self.DATA) / 2
nbytes = 4096
while 1:
sent = self.sendfile_wrapper(self.sockno, self.fileno, offset, nbytes)
if sent == 0:
break
offset += sent
total_sent += sent
self.assertTrue(sent <= nbytes)
self.client.close()
self.server.wait()
data = self.server.handler_instance.get_data()
expected = self.DATA[int(len(self.DATA) / 2):]
self.assertEqual(total_sent, len(expected))
self.assertEqual(hash(data), hash(expected))
def test_offset_overflow(self):
# specify an offset > file size
offset = len(self.DATA) + 4096
sent = os.sendfile(self.sockno, self.fileno, offset, 4096)
self.assertEqual(sent, 0)
self.client.close()
self.server.wait()
data = self.server.handler_instance.get_data()
self.assertEqual(data, b'')
def test_invalid_offset(self):
with self.assertRaises(OSError) as cm:
os.sendfile(self.sockno, self.fileno, -1, 4096)
self.assertEqual(cm.exception.errno, errno.EINVAL)
# --- headers / trailers tests
if SUPPORT_HEADERS_TRAILERS:
def test_headers(self):
total_sent = 0
sent = os.sendfile(self.sockno, self.fileno, 0, 4096,
headers=[b"x" * 512])
total_sent += sent
offset = 4096
nbytes = 4096
while 1:
sent = self.sendfile_wrapper(self.sockno, self.fileno,
offset, nbytes)
if sent == 0:
break
total_sent += sent
offset += sent
expected_data = b"x" * 512 + self.DATA
self.assertEqual(total_sent, len(expected_data))
self.client.close()
self.server.wait()
data = self.server.handler_instance.get_data()
self.assertEqual(hash(data), hash(expected_data))
def test_trailers(self):
TESTFN2 = support.TESTFN + "2"
f = open(TESTFN2, 'wb')
f.write(b"abcde")
f.close()
f = open(TESTFN2, 'rb')
try:
os.sendfile(self.sockno, f.fileno(), 0, 4096, trailers=[b"12345"])
self.client.close()
self.server.wait()
data = self.server.handler_instance.get_data()
self.assertEqual(data, b"abcde12345")
finally:
os.remove(TESTFN2)
if hasattr(os, "SF_NODISKIO"):
def test_flags(self):
try:
os.sendfile(self.sockno, self.fileno, 0, 4096,
flags=os.SF_NODISKIO)
except OSError as err:
if err.errno not in (errno.EBUSY, errno.EAGAIN):
raise
def test_main(): def test_main():
support.run_unittest( support.run_unittest(
FileTests, FileTests,
...@@ -1281,6 +1533,7 @@ def test_main(): ...@@ -1281,6 +1533,7 @@ def test_main():
PidTests, PidTests,
LoginTests, LoginTests,
LinkTests, LinkTests,
TestSendfile,
) )
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -35,6 +35,8 @@ Core and Builtins ...@@ -35,6 +35,8 @@ Core and Builtins
Library Library
------- -------
- Issue 10882: Add os.sendfile function.
- Issue #10868: Allow usage of the register method of an ABC as a class - Issue #10868: Allow usage of the register method of an ABC as a class
decorator. decorator.
......
...@@ -95,6 +95,20 @@ corresponding Unix manual entries for more information on calls."); ...@@ -95,6 +95,20 @@ corresponding Unix manual entries for more information on calls.");
#include <langinfo.h> #include <langinfo.h>
#endif #endif
#ifdef HAVE_SYS_SENDFILE_H
#include <sys/sendfile.h>
#endif
#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__APPLE__)
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#endif
/* Various compilers have only certain posix functions */ /* Various compilers have only certain posix functions */
/* XXX Gosh I wish these were all moved into pyconfig.h */ /* XXX Gosh I wish these were all moved into pyconfig.h */
#if defined(PYCC_VACPP) && defined(PYOS_OS2) #if defined(PYCC_VACPP) && defined(PYOS_OS2)
...@@ -349,6 +363,20 @@ static int win32_can_symlink = 0; ...@@ -349,6 +363,20 @@ static int win32_can_symlink = 0;
#endif #endif
#endif #endif
int
PyParse_off_t(PyObject* arg, void* addr)
{
#if !defined(HAVE_LARGEFILE_SUPPORT)
*((off_t*)addr) = PyLong_AsLong(arg);
#else
*((off_t*)addr) = PyLong_Check(arg) ? PyLong_AsLongLong(arg)
: PyLong_AsLong(arg);
#endif
if (PyErr_Occurred())
return 0;
return 1;
}
#if defined _MSC_VER && _MSC_VER >= 1400 #if defined _MSC_VER && _MSC_VER >= 1400
/* Microsoft CRT in VS2005 and higher will verify that a filehandle is /* Microsoft CRT in VS2005 and higher will verify that a filehandle is
* valid and throw an assertion if it isn't. * valid and throw an assertion if it isn't.
...@@ -5723,6 +5751,179 @@ posix_write(PyObject *self, PyObject *args) ...@@ -5723,6 +5751,179 @@ posix_write(PyObject *self, PyObject *args)
return PyLong_FromSsize_t(size); return PyLong_FromSsize_t(size);
} }
#ifdef HAVE_SENDFILE
#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__APPLE__)
static int
iov_setup(struct iovec **iov, Py_buffer **buf, PyObject *seq, int cnt, int type)
{
int i, j;
*iov = PyMem_New(struct iovec, cnt);
if (*iov == NULL) {
PyErr_NoMemory();
return 0;
}
*buf = PyMem_New(Py_buffer, cnt);
if (*buf == NULL) {
PyMem_Del(*iov);
PyErr_NoMemory();
return 0;
}
for (i = 0; i < cnt; i++) {
if (PyObject_GetBuffer(PySequence_GetItem(seq, i), &(*buf)[i],
type) == -1) {
PyMem_Del(*iov);
for (j = 0; j < i; j++) {
PyBuffer_Release(&(*buf)[j]);
}
PyMem_Del(*buf);
return 0;
}
(*iov)[i].iov_base = (*buf)[i].buf;
(*iov)[i].iov_len = (*buf)[i].len;
}
return 1;
}
static void
iov_cleanup(struct iovec *iov, Py_buffer *buf, int cnt)
{
int i;
PyMem_Del(iov);
for (i = 0; i < cnt; i++) {
PyBuffer_Release(&buf[i]);
}
PyMem_Del(buf);
}
#endif
PyDoc_STRVAR(posix_sendfile__doc__,
"sendfile(out, in, offset, nbytes) -> byteswritten\n\
sendfile(out, in, offset, nbytes, headers=None, trailers=None, flags=0)\n\
-> byteswritten\n\
Copy nbytes bytes from file descriptor in to file descriptor out.");
static PyObject *
posix_sendfile(PyObject *self, PyObject *args, PyObject *kwdict)
{
int in, out;
Py_ssize_t ret;
off_t offset;
#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__APPLE__)
#ifndef __APPLE__
Py_ssize_t len;
#endif
PyObject *headers = NULL, *trailers = NULL;
Py_buffer *hbuf, *tbuf;
off_t sbytes;
struct sf_hdtr sf;
int flags = 0;
sf.headers = NULL;
sf.trailers = NULL;
static char *keywords[] = {"out", "in",
"offset", "count",
"headers", "trailers", "flags", NULL};
#ifdef __APPLE__
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO&O&|OOi:sendfile",
keywords, &out, &in, PyParse_off_t, &offset, PyParse_off_t, &sbytes,
#else
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO&n|OOi:sendfile",
keywords, &out, &in, PyParse_off_t, &offset, &len,
#endif
&headers, &trailers, &flags))
return NULL;
if (headers != NULL) {
if (!PySequence_Check(headers)) {
PyErr_SetString(PyExc_TypeError,
"sendfile() headers must be a sequence or None");
return NULL;
} else {
sf.hdr_cnt = PySequence_Size(headers);
if (sf.hdr_cnt > 0 && !iov_setup(&(sf.headers), &hbuf,
headers, sf.hdr_cnt, PyBUF_SIMPLE))
return NULL;
}
}
if (trailers != NULL) {
if (!PySequence_Check(trailers)) {
PyErr_SetString(PyExc_TypeError,
"sendfile() trailers must be a sequence or None");
return NULL;
} else {
sf.trl_cnt = PySequence_Size(trailers);
if (sf.trl_cnt > 0 && !iov_setup(&(sf.trailers), &tbuf,
trailers, sf.trl_cnt, PyBUF_SIMPLE))
return NULL;
}
}
Py_BEGIN_ALLOW_THREADS
#ifdef __APPLE__
ret = sendfile(in, out, offset, &sbytes, &sf, flags);
#else
ret = sendfile(in, out, offset, len, &sf, &sbytes, flags);
#endif
Py_END_ALLOW_THREADS
if (sf.headers != NULL)
iov_cleanup(sf.headers, hbuf, sf.hdr_cnt);
if (sf.trailers != NULL)
iov_cleanup(sf.trailers, tbuf, sf.trl_cnt);
if (ret < 0) {
if ((errno == EAGAIN) || (errno == EBUSY)) {
if (sbytes != 0) {
// some data has been sent
goto done;
}
else {
// no data has been sent; upper application is supposed
// to retry on EAGAIN or EBUSY
return posix_error();
}
}
return posix_error();
}
goto done;
done:
#if !defined(HAVE_LARGEFILE_SUPPORT)
return Py_BuildValue("l", sbytes);
#else
return Py_BuildValue("L", sbytes);
#endif
#else
Py_ssize_t count;
PyObject *offobj;
static char *keywords[] = {"out", "in",
"offset", "count", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiOn:sendfile",
keywords, &out, &in, &offobj, &count))
return NULL;
#ifdef linux
if (offobj == Py_None) {
Py_BEGIN_ALLOW_THREADS
ret = sendfile(out, in, NULL, count);
Py_END_ALLOW_THREADS
if (ret < 0)
return posix_error();
Py_INCREF(Py_None);
return Py_BuildValue("nO", ret, Py_None);
}
#endif
PyParse_off_t(offobj, &offset);
Py_BEGIN_ALLOW_THREADS
ret = sendfile(out, in, &offset, count);
Py_END_ALLOW_THREADS
if (ret < 0)
return posix_error();
return Py_BuildValue("n", ret);
#endif
}
#endif
PyDoc_STRVAR(posix_fstat__doc__, PyDoc_STRVAR(posix_fstat__doc__,
"fstat(fd) -> stat result\n\n\ "fstat(fd) -> stat result\n\n\
...@@ -7971,6 +8172,10 @@ static PyMethodDef posix_methods[] = { ...@@ -7971,6 +8172,10 @@ static PyMethodDef posix_methods[] = {
{"lseek", posix_lseek, METH_VARARGS, posix_lseek__doc__}, {"lseek", posix_lseek, METH_VARARGS, posix_lseek__doc__},
{"read", posix_read, METH_VARARGS, posix_read__doc__}, {"read", posix_read, METH_VARARGS, posix_read__doc__},
{"write", posix_write, METH_VARARGS, posix_write__doc__}, {"write", posix_write, METH_VARARGS, posix_write__doc__},
#ifdef HAVE_SENDFILE
{"sendfile", (PyCFunction)posix_sendfile, METH_VARARGS | METH_KEYWORDS,
posix_sendfile__doc__},
#endif
{"fstat", posix_fstat, METH_VARARGS, posix_fstat__doc__}, {"fstat", posix_fstat, METH_VARARGS, posix_fstat__doc__},
{"isatty", posix_isatty, METH_VARARGS, posix_isatty__doc__}, {"isatty", posix_isatty, METH_VARARGS, posix_isatty__doc__},
#ifdef HAVE_PIPE #ifdef HAVE_PIPE
...@@ -8362,6 +8567,17 @@ all_ins(PyObject *d) ...@@ -8362,6 +8567,17 @@ all_ins(PyObject *d)
if (ins(d, "ST_NOSUID", (long)ST_NOSUID)) return -1; if (ins(d, "ST_NOSUID", (long)ST_NOSUID)) return -1;
#endif /* ST_NOSUID */ #endif /* ST_NOSUID */
/* FreeBSD sendfile() constants */
#ifdef SF_NODISKIO
if (ins(d, "SF_NODISKIO", (long)SF_NODISKIO)) return -1;
#endif
#ifdef SF_MNOWAIT
if (ins(d, "SF_MNOWAIT", (long)SF_MNOWAIT)) return -1;
#endif
#ifdef SF_SYNC
if (ins(d, "SF_SYNC", (long)SF_SYNC)) return -1;
#endif
#ifdef HAVE_SPAWNV #ifdef HAVE_SPAWNV
#if defined(PYOS_OS2) && defined(PYCC_GCC) #if defined(PYOS_OS2) && defined(PYCC_GCC)
if (ins(d, "P_WAIT", (long)P_WAIT)) return -1; if (ins(d, "P_WAIT", (long)P_WAIT)) return -1;
......
...@@ -1283,10 +1283,10 @@ shadow.h signal.h stdint.h stropts.h termios.h thread.h \ ...@@ -1283,10 +1283,10 @@ shadow.h signal.h stdint.h stropts.h termios.h thread.h \
unistd.h utime.h \ unistd.h utime.h \
sys/audioio.h sys/bsdtty.h sys/epoll.h sys/event.h sys/file.h sys/loadavg.h \ sys/audioio.h sys/bsdtty.h sys/epoll.h sys/event.h sys/file.h sys/loadavg.h \
sys/lock.h sys/mkdev.h sys/modem.h \ sys/lock.h sys/mkdev.h sys/modem.h \
sys/param.h sys/poll.h sys/select.h sys/socket.h sys/statvfs.h sys/stat.h \ sys/param.h sys/poll.h sys/select.h sys/sendfile.h sys/socket.h sys/statvfs.h \
sys/termio.h sys/time.h \ sys/stat.h sys/termio.h sys/time.h \
sys/times.h sys/types.h sys/un.h sys/utsname.h sys/wait.h pty.h libutil.h \ sys/times.h sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h pty.h \
sys/resource.h netpacket/packet.h sysexits.h bluetooth.h \ libutil.h sys/resource.h netpacket/packet.h sysexits.h bluetooth.h \
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h) bluetooth/bluetooth.h linux/tipc.h spawn.h util.h)
AC_HEADER_DIRENT AC_HEADER_DIRENT
AC_HEADER_MAJOR AC_HEADER_MAJOR
...@@ -1891,6 +1891,7 @@ AC_MSG_RESULT($SHLIBS) ...@@ -1891,6 +1891,7 @@ AC_MSG_RESULT($SHLIBS)
# checks for libraries # checks for libraries
AC_CHECK_LIB(sendfile, sendfile)
AC_CHECK_LIB(dl, dlopen) # Dynamic linking for SunOS/Solaris and SYSV AC_CHECK_LIB(dl, dlopen) # Dynamic linking for SunOS/Solaris and SYSV
AC_CHECK_LIB(dld, shl_load) # Dynamic linking for HP-UX AC_CHECK_LIB(dld, shl_load) # Dynamic linking for HP-UX
...@@ -2539,7 +2540,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \ ...@@ -2539,7 +2540,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
initgroups kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \ initgroups kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
mremap nice pathconf pause plock poll pthread_init \ mremap nice pathconf pause plock poll pthread_init \
putenv readlink realpath \ putenv readlink realpath \
select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \ select sem_open sem_timedwait sem_getvalue sem_unlink sendfile setegid seteuid \
setgid \ setgid \
setlocale setregid setreuid setresuid setresgid setsid setpgid setpgrp setuid setvbuf \ setlocale setregid setreuid setresuid setresgid setsid setpgid setpgrp setuid setvbuf \
sigaction siginterrupt sigrelse snprintf strftime strlcpy \ sigaction siginterrupt sigrelse snprintf strftime strlcpy \
......
...@@ -1024,6 +1024,9 @@ ...@@ -1024,6 +1024,9 @@
/* The size of `size_t', as computed by sizeof. */ /* The size of `size_t', as computed by sizeof. */
#undef SIZEOF_SIZE_T #undef SIZEOF_SIZE_T
/* Define to 1 if you have the `sendfile' function. */
#undef HAVE_SENDFILE
/* The size of `time_t', as computed by sizeof. */ /* The size of `time_t', as computed by sizeof. */
#undef SIZEOF_TIME_T #undef SIZEOF_TIME_T
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment