Commit 26fd8feb authored by Giampaolo Rodola's avatar Giampaolo Rodola

merge heads

parents e1266782 7ca29507
...@@ -32,7 +32,6 @@ Modules/Setup.local ...@@ -32,7 +32,6 @@ Modules/Setup.local
Modules/config.c Modules/config.c
Modules/ld_so_aix$ Modules/ld_so_aix$
Parser/pgen$ Parser/pgen$
PCbuild/amd64/
^core ^core
^python-gdb.py ^python-gdb.py
^python.exe-gdb.py ^python.exe-gdb.py
...@@ -56,6 +55,12 @@ PC/python_nt*.h ...@@ -56,6 +55,12 @@ PC/python_nt*.h
PC/pythonnt_rc*.h PC/pythonnt_rc*.h
PC/*.obj PC/*.obj
PC/*.exe PC/*.exe
PC/*/*.user
PC/*/*.ncb
PC/*/*.suo
PC/*/Win32-temp-*
PC/*/x64-temp-*
PC/*/amd64
PCbuild/*.exe PCbuild/*.exe
PCbuild/*.dll PCbuild/*.dll
PCbuild/*.pdb PCbuild/*.pdb
...@@ -69,6 +74,8 @@ PCbuild/*.suo ...@@ -69,6 +74,8 @@ PCbuild/*.suo
PCbuild/*.*sdf PCbuild/*.*sdf
PCbuild/Win32-temp-* PCbuild/Win32-temp-*
PCbuild/x64-temp-* PCbuild/x64-temp-*
PCbuild/amd64
BuildLog.htm
__pycache__ __pycache__
Modules/_testembed Modules/_testembed
.coverage .coverage
......
...@@ -17,10 +17,10 @@ yourself. However the bundled generator knows how to generate most email in a ...@@ -17,10 +17,10 @@ yourself. However the bundled generator knows how to generate most email in a
standards-compliant way, should handle MIME and non-MIME email messages just standards-compliant way, should handle MIME and non-MIME email messages just
fine, and is designed so that the transformation from flat text, to a message fine, and is designed so that the transformation from flat text, to a message
structure via the :class:`~email.parser.Parser` class, and back to flat text, structure via the :class:`~email.parser.Parser` class, and back to flat text,
is idempotent (the input is identical to the output). On the other hand, using is idempotent (the input is identical to the output) [#]_. On the other hand,
the Generator on a :class:`~email.message.Message` constructed by program may using the Generator on a :class:`~email.message.Message` constructed by program
result in changes to the :class:`~email.message.Message` object as defaults are may result in changes to the :class:`~email.message.Message` object as defaults
filled in. are filled in.
:class:`bytes` output can be generated using the :class:`BytesGenerator` class. :class:`bytes` output can be generated using the :class:`BytesGenerator` class.
If the message object structure contains non-ASCII bytes, this generator's If the message object structure contains non-ASCII bytes, this generator's
...@@ -223,3 +223,12 @@ representing the part. ...@@ -223,3 +223,12 @@ representing the part.
The default value for *fmt* is ``None``, meaning :: The default value for *fmt* is ``None``, meaning ::
[Non-text (%(type)s) part of message omitted, filename %(filename)s] [Non-text (%(type)s) part of message omitted, filename %(filename)s]
.. rubric:: Footnotes
.. [#] This statement assumes that you use the appropriate setting for the
``unixfrom`` argument, and that you set maxheaderlen=0 (which will
preserve whatever the input line lengths were). It is also not strictly
true, since in many cases runs of whitespace in headers are collapsed
into single blanks. The latter is a bug that will eventually be fixed.
...@@ -339,6 +339,15 @@ and also the following constants for integer status codes: ...@@ -339,6 +339,15 @@ and also the following constants for integer status codes:
| :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, | | :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, |
| | | :rfc:`2817`, Section 6 | | | | :rfc:`2817`, Section 6 |
+------------------------------------------+---------+-----------------------------------------------------------------------+ +------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`PRECONDITION_REQUIRED` | ``428`` | Additional HTTP Status Codes, |
| | | :rfc:`6585`, Section 3 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`TOO_MANY_REQUESTS` | ``429`` | Additional HTTP Status Codes, |
| | | :rfc:`6585`, Section 4 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes, |
| | | :rfc:`6585`, Section 5 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section | | :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section |
| | | 10.5.1 | | | | 10.5.1 |
| | | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_ | | | | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_ |
...@@ -369,6 +378,12 @@ and also the following constants for integer status codes: ...@@ -369,6 +378,12 @@ and also the following constants for integer status codes:
| :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, | | :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, |
| | | :rfc:`2774`, Section 7 | | | | :rfc:`2774`, Section 7 |
+------------------------------------------+---------+-----------------------------------------------------------------------+ +------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes, |
| | | :rfc:`6585`, Section 6 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
.. versionchanged:: 3.3
Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.
.. data:: responses .. data:: responses
......
...@@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422 ...@@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422
LOCKED = 423 LOCKED = 423
FAILED_DEPENDENCY = 424 FAILED_DEPENDENCY = 424
UPGRADE_REQUIRED = 426 UPGRADE_REQUIRED = 426
PRECONDITION_REQUIRED = 428
TOO_MANY_REQUESTS = 429
REQUEST_HEADER_FIELDS_TOO_LARGE = 431
# server error # server error
INTERNAL_SERVER_ERROR = 500 INTERNAL_SERVER_ERROR = 500
...@@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504 ...@@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504
HTTP_VERSION_NOT_SUPPORTED = 505 HTTP_VERSION_NOT_SUPPORTED = 505
INSUFFICIENT_STORAGE = 507 INSUFFICIENT_STORAGE = 507
NOT_EXTENDED = 510 NOT_EXTENDED = 510
NETWORK_AUTHENTICATION_REQUIRED = 511
# Mapping status codes to official W3C names # Mapping status codes to official W3C names
responses = { responses = {
...@@ -192,6 +196,9 @@ responses = { ...@@ -192,6 +196,9 @@ responses = {
415: 'Unsupported Media Type', 415: 'Unsupported Media Type',
416: 'Requested Range Not Satisfiable', 416: 'Requested Range Not Satisfiable',
417: 'Expectation Failed', 417: 'Expectation Failed',
428: 'Precondition Required',
429: 'Too Many Requests',
431: 'Request Header Fields Too Large',
500: 'Internal Server Error', 500: 'Internal Server Error',
501: 'Not Implemented', 501: 'Not Implemented',
...@@ -199,6 +206,7 @@ responses = { ...@@ -199,6 +206,7 @@ responses = {
503: 'Service Unavailable', 503: 'Service Unavailable',
504: 'Gateway Timeout', 504: 'Gateway Timeout',
505: 'HTTP Version Not Supported', 505: 'HTTP Version Not Supported',
511: 'Network Authentication Required',
} }
# maximal amount of data to read at one time in _safe_read # maximal amount of data to read at one time in _safe_read
......
...@@ -573,7 +573,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): ...@@ -573,7 +573,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
# Table mapping response codes to messages; entries have the # Table mapping response codes to messages; entries have the
# form {code: (shortmessage, longmessage)}. # form {code: (shortmessage, longmessage)}.
# See RFC 2616. # See RFC 2616 and 6585.
responses = { responses = {
100: ('Continue', 'Request received, please continue'), 100: ('Continue', 'Request received, please continue'),
101: ('Switching Protocols', 101: ('Switching Protocols',
...@@ -628,6 +628,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): ...@@ -628,6 +628,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
'Cannot satisfy request range.'), 'Cannot satisfy request range.'),
417: ('Expectation Failed', 417: ('Expectation Failed',
'Expect condition could not be satisfied.'), 'Expect condition could not be satisfied.'),
428: ('Precondition Required',
'The origin server requires the request to be conditional.'),
429: ('Too Many Requests', 'The user has sent too many requests '
'in a given amount of time ("rate limiting").'),
431: ('Request Header Fields Too Large', 'The server is unwilling to '
'process the request because its header fields are too large.'),
500: ('Internal Server Error', 'Server got itself in trouble'), 500: ('Internal Server Error', 'Server got itself in trouble'),
501: ('Not Implemented', 501: ('Not Implemented',
...@@ -638,6 +644,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): ...@@ -638,6 +644,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
504: ('Gateway Timeout', 504: ('Gateway Timeout',
'The gateway server did not receive a timely response'), 'The gateway server did not receive a timely response'),
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
511: ('Network Authentication Required',
'The client needs to authenticate to gain network access.'),
} }
......
...@@ -23,6 +23,28 @@ del sys.modules['bisect'] ...@@ -23,6 +23,28 @@ del sys.modules['bisect']
import bisect as c_bisect import bisect as c_bisect
class Range(object):
"""A trivial range()-like object without any integer width limitations."""
def __init__(self, start, stop):
self.start = start
self.stop = stop
self.last_insert = None
def __len__(self):
return self.stop - self.start
def __getitem__(self, idx):
n = self.stop - self.start
if idx < 0:
idx += n
if idx >= n:
raise IndexError(idx)
return self.start + idx
def insert(self, idx, item):
self.last_insert = idx, item
class TestBisect(unittest.TestCase): class TestBisect(unittest.TestCase):
module = None module = None
...@@ -125,9 +147,28 @@ class TestBisect(unittest.TestCase): ...@@ -125,9 +147,28 @@ class TestBisect(unittest.TestCase):
def test_large_range(self): def test_large_range(self):
# Issue 13496 # Issue 13496
mod = self.module mod = self.module
data = range(sys.maxsize-1) n = sys.maxsize
self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3) data = range(n-1)
self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2) self.assertEqual(mod.bisect_left(data, n-3), n-3)
self.assertEqual(mod.bisect_right(data, n-3), n-2)
self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
def test_large_pyrange(self):
# Same as above, but without C-imposed limits on range() parameters
mod = self.module
n = sys.maxsize
data = Range(0, n-1)
self.assertEqual(mod.bisect_left(data, n-3), n-3)
self.assertEqual(mod.bisect_right(data, n-3), n-2)
self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
x = n - 100
mod.insort_left(data, x, x - 50, x + 50)
self.assertEqual(data.last_insert, (x, x))
x = n - 200
mod.insort_right(data, x, x - 50, x + 50)
self.assertEqual(data.last_insert, (x + 1, x))
def test_random(self, n=25): def test_random(self, n=25):
from random import randrange from random import randrange
......
...@@ -137,8 +137,57 @@ class PkgutilPEP302Tests(unittest.TestCase): ...@@ -137,8 +137,57 @@ class PkgutilPEP302Tests(unittest.TestCase):
self.assertEqual(foo.loads, 1) self.assertEqual(foo.loads, 1)
del sys.modules['foo'] del sys.modules['foo']
class ExtendPathTests(unittest.TestCase):
def create_init(self, pkgname):
dirname = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, dirname)
sys.path.insert(0, dirname)
pkgdir = os.path.join(dirname, pkgname)
os.mkdir(pkgdir)
with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl:
fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n')
return dirname
def create_submodule(self, dirname, pkgname, submodule_name, value):
module_name = os.path.join(dirname, pkgname, submodule_name + '.py')
with open(module_name, 'w') as fl:
print('value={}'.format(value), file=fl)
def setUp(self):
# Create 2 directories on sys.path
self.pkgname = 'foo'
self.dirname_0 = self.create_init(self.pkgname)
self.dirname_1 = self.create_init(self.pkgname)
def tearDown(self):
del sys.path[0]
del sys.path[0]
del sys.modules['foo']
del sys.modules['foo.bar']
del sys.modules['foo.baz']
def test_simple(self):
self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1)
import foo.bar
import foo.baz
# Ensure we read the expected values
self.assertEqual(foo.bar.value, 0)
self.assertEqual(foo.baz.value, 1)
# Ensure the path is set up correctly
self.assertEqual(sorted(foo.__path__),
sorted([os.path.join(self.dirname_0, self.pkgname),
os.path.join(self.dirname_1, self.pkgname)]))
# XXX: test .pkg files
def test_main(): def test_main():
run_unittest(PkgutilTests, PkgutilPEP302Tests) run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests)
# this is necessary if test is run repeated (like when finding leaks) # this is necessary if test is run repeated (like when finding leaks)
import zipimport import zipimport
zipimport._zip_directory_cache.clear() zipimport._zip_directory_cache.clear()
......
...@@ -540,12 +540,19 @@ class Misc: ...@@ -540,12 +540,19 @@ class Misc:
The type keyword specifies the form in which the data is The type keyword specifies the form in which the data is
to be returned and should be an atom name such as STRING to be returned and should be an atom name such as STRING
or FILE_NAME. Type defaults to STRING. or FILE_NAME. Type defaults to STRING, except on X11, where the default
is to try UTF8_STRING and fall back to STRING.
This command is equivalent to: This command is equivalent to:
selection_get(CLIPBOARD) selection_get(CLIPBOARD)
""" """
if 'type' not in kw and self._windowingsystem == 'x11':
try:
kw['type'] = 'UTF8_STRING'
return self.tk.call(('clipboard', 'get') + self._options(kw))
except TclError:
del kw['type']
return self.tk.call(('clipboard', 'get') + self._options(kw)) return self.tk.call(('clipboard', 'get') + self._options(kw))
def clipboard_clear(self, **kw): def clipboard_clear(self, **kw):
...@@ -627,8 +634,16 @@ class Misc: ...@@ -627,8 +634,16 @@ class Misc:
A keyword parameter selection specifies the name of A keyword parameter selection specifies the name of
the selection and defaults to PRIMARY. A keyword the selection and defaults to PRIMARY. A keyword
parameter displayof specifies a widget on the display parameter displayof specifies a widget on the display
to use.""" to use. A keyword parameter type specifies the form of data to be
fetched, defaulting to STRING except on X11, where UTF8_STRING is tried
before STRING."""
if 'displayof' not in kw: kw['displayof'] = self._w if 'displayof' not in kw: kw['displayof'] = self._w
if 'type' not in kw and self._windowingsystem == 'x11':
try:
kw['type'] = 'UTF8_STRING'
return self.tk.call(('selection', 'get') + self._options(kw))
except TclError:
del kw['type']
return self.tk.call(('selection', 'get') + self._options(kw)) return self.tk.call(('selection', 'get') + self._options(kw))
def selection_handle(self, command, **kw): def selection_handle(self, command, **kw):
"""Specify a function COMMAND to call if the X """Specify a function COMMAND to call if the X
...@@ -1043,6 +1058,15 @@ class Misc: ...@@ -1043,6 +1058,15 @@ class Misc:
if displayof is None: if displayof is None:
return ('-displayof', self._w) return ('-displayof', self._w)
return () return ()
@property
def _windowingsystem(self):
"""Internal function."""
try:
return self._root()._windowingsystem_cached
except AttributeError:
ws = self._root()._windowingsystem_cached = \
self.tk.call('tk', 'windowingsystem')
return ws
def _options(self, cnf, kw = None): def _options(self, cnf, kw = None):
"""Internal function.""" """Internal function."""
if kw: if kw:
......
...@@ -919,6 +919,7 @@ Ralf Schmitt ...@@ -919,6 +919,7 @@ Ralf Schmitt
Michael Schneider Michael Schneider
Peter Schneider-Kamp Peter Schneider-Kamp
Arvin Schnell Arvin Schnell
Robin Schreiber
Chad J. Schroeder Chad J. Schroeder
Sam Schulenburg Sam Schulenburg
Stefan Schwarzer Stefan Schwarzer
...@@ -1129,6 +1130,7 @@ Florent Xicluna ...@@ -1129,6 +1130,7 @@ Florent Xicluna
Hirokazu Yamamoto Hirokazu Yamamoto
Ka-Ping Yee Ka-Ping Yee
Jason Yeo Jason Yeo
EungJun Yi
Bob Yodlowski Bob Yodlowski
Danny Yoo Danny Yoo
George Yoshida George Yoshida
......
...@@ -10,6 +10,9 @@ What's New in Python 3.3.0 Alpha 4? ...@@ -10,6 +10,9 @@ What's New in Python 3.3.0 Alpha 4?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
Patch by Serhiy Storchaka.
- asdl_seq and asdl_int_seq are now Py_ssize_t sized. - asdl_seq and asdl_int_seq are now Py_ssize_t sized.
- Issue #14133 (PEP 415): Implement suppression of __context__ display with an - Issue #14133 (PEP 415): Implement suppression of __context__ display with an
...@@ -31,6 +34,21 @@ Core and Builtins ...@@ -31,6 +34,21 @@ Core and Builtins
Library Library
------- -------
- Issue #14829: Fix bisect and range() indexing with large indices
(>= 2 ** 32) under 64-bit Windows.
- Issue #14732: The _csv module now uses PEP 3121 module initialization.
Patch by Robin Schreiber.
- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
and http.client. Patch by EungJun Yi.
- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
accessing the Tk clipboard. Modify clipboad_get() to first request type
UTF8_STRING when no specific type is requested in an X11 windowing
environment, falling back to the current default type STRING if that fails.
Original patch by Thomas Kluyver.
- Issue #14773: Fix os.fwalk() failing on dangling symlinks. - Issue #14773: Fix os.fwalk() failing on dangling symlinks.
- Issue #12541: Be lenient with quotes around Realm field of HTTP Basic - Issue #12541: Be lenient with quotes around Realm field of HTTP Basic
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru). Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
*/ */
#define PY_SSIZE_T_CLEAN
#include "Python.h" #include "Python.h"
static Py_ssize_t static Py_ssize_t
...@@ -195,8 +196,7 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw) ...@@ -195,8 +196,7 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw)
return NULL; return NULL;
} else { } else {
_Py_IDENTIFIER(insert); _Py_IDENTIFIER(insert);
result = _PyObject_CallMethodId(list, &PyId_insert, "nO", index, item);
result = _PyObject_CallMethodId(list, &PyId_insert, "iO", index, item);
if (result == NULL) if (result == NULL)
return NULL; return NULL;
Py_DECREF(result); Py_DECREF(result);
......
...@@ -16,9 +16,39 @@ module instead. ...@@ -16,9 +16,39 @@ module instead.
#define IS_BASESTRING(o) \ #define IS_BASESTRING(o) \
PyUnicode_Check(o) PyUnicode_Check(o)
static PyObject *error_obj; /* CSV exception */ typedef struct {
static PyObject *dialects; /* Dialect registry */ PyObject *error_obj; /* CSV exception */
static long field_limit = 128 * 1024; /* max parsed field size */ PyObject *dialects; /* Dialect registry */
long field_limit; /* max parsed field size */
} _csvstate;
#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
static int
_csv_clear(PyObject *m)
{
Py_CLEAR(_csvstate(m)->error_obj);
Py_CLEAR(_csvstate(m)->dialects);
return 0;
}
static int
_csv_traverse(PyObject *m, visitproc visit, void *arg)
{
Py_VISIT(_csvstate(m)->error_obj);
Py_VISIT(_csvstate(m)->dialects);
return 0;
}
static void
_csv_free(void *m)
{
_csv_clear((PyObject *)m);
}
static struct PyModuleDef _csvmodule;
#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
typedef enum { typedef enum {
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
...@@ -103,10 +133,10 @@ get_dialect_from_registry(PyObject * name_obj) ...@@ -103,10 +133,10 @@ get_dialect_from_registry(PyObject * name_obj)
{ {
PyObject *dialect_obj; PyObject *dialect_obj;
dialect_obj = PyDict_GetItem(dialects, name_obj); dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
if (dialect_obj == NULL) { if (dialect_obj == NULL) {
if (!PyErr_Occurred()) if (!PyErr_Occurred())
PyErr_Format(error_obj, "unknown dialect"); PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
} }
else else
Py_INCREF(dialect_obj); Py_INCREF(dialect_obj);
...@@ -544,9 +574,9 @@ parse_grow_buff(ReaderObj *self) ...@@ -544,9 +574,9 @@ parse_grow_buff(ReaderObj *self)
static int static int
parse_add_char(ReaderObj *self, Py_UCS4 c) parse_add_char(ReaderObj *self, Py_UCS4 c)
{ {
if (self->field_len >= field_limit) { if (self->field_len >= _csvstate_global->field_limit) {
PyErr_Format(error_obj, "field larger than field limit (%ld)", PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
field_limit); _csvstate_global->field_limit);
return -1; return -1;
} }
if (self->field_len == self->field_size && !parse_grow_buff(self)) if (self->field_len == self->field_size && !parse_grow_buff(self))
...@@ -703,7 +733,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) ...@@ -703,7 +733,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
} }
else { else {
/* illegal */ /* illegal */
PyErr_Format(error_obj, "'%c' expected after '%c'", PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
dialect->delimiter, dialect->delimiter,
dialect->quotechar); dialect->quotechar);
return -1; return -1;
...@@ -716,7 +746,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) ...@@ -716,7 +746,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
else if (c == '\0') else if (c == '\0')
self->state = START_RECORD; self->state = START_RECORD;
else { else {
PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
return -1; return -1;
} }
break; break;
...@@ -755,12 +785,12 @@ Reader_iternext(ReaderObj *self) ...@@ -755,12 +785,12 @@ Reader_iternext(ReaderObj *self)
if (lineobj == NULL) { if (lineobj == NULL) {
/* End of input OR exception */ /* End of input OR exception */
if (!PyErr_Occurred() && self->field_len != 0) if (!PyErr_Occurred() && self->field_len != 0)
PyErr_Format(error_obj, PyErr_Format(_csvstate_global->error_obj,
"newline inside string"); "newline inside string");
return NULL; return NULL;
} }
if (!PyUnicode_Check(lineobj)) { if (!PyUnicode_Check(lineobj)) {
PyErr_Format(error_obj, PyErr_Format(_csvstate_global->error_obj,
"iterator should return strings, " "iterator should return strings, "
"not %.200s " "not %.200s "
"(did you open the file in text mode?)", "(did you open the file in text mode?)",
...@@ -778,7 +808,7 @@ Reader_iternext(ReaderObj *self) ...@@ -778,7 +808,7 @@ Reader_iternext(ReaderObj *self)
c = PyUnicode_READ(kind, data, pos); c = PyUnicode_READ(kind, data, pos);
if (c == '\0') { if (c == '\0') {
Py_DECREF(lineobj); Py_DECREF(lineobj);
PyErr_Format(error_obj, PyErr_Format(_csvstate_global->error_obj,
"line contains NULL byte"); "line contains NULL byte");
goto err; goto err;
} }
...@@ -994,7 +1024,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, ...@@ -994,7 +1024,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
} }
if (want_escape) { if (want_escape) {
if (!dialect->escapechar) { if (!dialect->escapechar) {
PyErr_Format(error_obj, PyErr_Format(_csvstate_global->error_obj,
"need to escape, but no escapechar set"); "need to escape, but no escapechar set");
return -1; return -1;
} }
...@@ -1010,7 +1040,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, ...@@ -1010,7 +1040,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
*/ */
if (i == 0 && quote_empty) { if (i == 0 && quote_empty) {
if (dialect->quoting == QUOTE_NONE) { if (dialect->quoting == QUOTE_NONE) {
PyErr_Format(error_obj, PyErr_Format(_csvstate_global->error_obj,
"single empty field record must be quoted"); "single empty field record must be quoted");
return -1; return -1;
} }
...@@ -1127,7 +1157,7 @@ csv_writerow(WriterObj *self, PyObject *seq) ...@@ -1127,7 +1157,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
PyObject *line, *result; PyObject *line, *result;
if (!PySequence_Check(seq)) if (!PySequence_Check(seq))
return PyErr_Format(error_obj, "sequence expected"); return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
len = PySequence_Length(seq); len = PySequence_Length(seq);
if (len < 0) if (len < 0)
...@@ -1353,7 +1383,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) ...@@ -1353,7 +1383,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
static PyObject * static PyObject *
csv_list_dialects(PyObject *module, PyObject *args) csv_list_dialects(PyObject *module, PyObject *args)
{ {
return PyDict_Keys(dialects); return PyDict_Keys(_csvstate_global->dialects);
} }
static PyObject * static PyObject *
...@@ -1372,7 +1402,7 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) ...@@ -1372,7 +1402,7 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
dialect = _call_dialect(dialect_obj, kwargs); dialect = _call_dialect(dialect_obj, kwargs);
if (dialect == NULL) if (dialect == NULL)
return NULL; return NULL;
if (PyDict_SetItem(dialects, name_obj, dialect) < 0) { if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Py_DECREF(dialect); Py_DECREF(dialect);
return NULL; return NULL;
} }
...@@ -1384,8 +1414,8 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) ...@@ -1384,8 +1414,8 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
static PyObject * static PyObject *
csv_unregister_dialect(PyObject *module, PyObject *name_obj) csv_unregister_dialect(PyObject *module, PyObject *name_obj)
{ {
if (PyDict_DelItem(dialects, name_obj) < 0) if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
return PyErr_Format(error_obj, "unknown dialect"); return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
...@@ -1400,7 +1430,7 @@ static PyObject * ...@@ -1400,7 +1430,7 @@ static PyObject *
csv_field_size_limit(PyObject *module, PyObject *args) csv_field_size_limit(PyObject *module, PyObject *args)
{ {
PyObject *new_limit = NULL; PyObject *new_limit = NULL;
long old_limit = field_limit; long old_limit = _csvstate_global->field_limit;
if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
return NULL; return NULL;
...@@ -1410,9 +1440,9 @@ csv_field_size_limit(PyObject *module, PyObject *args) ...@@ -1410,9 +1440,9 @@ csv_field_size_limit(PyObject *module, PyObject *args)
"limit must be an integer"); "limit must be an integer");
return NULL; return NULL;
} }
field_limit = PyLong_AsLong(new_limit); _csvstate_global->field_limit = PyLong_AsLong(new_limit);
if (field_limit == -1 && PyErr_Occurred()) { if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
field_limit = old_limit; _csvstate_global->field_limit = old_limit;
return NULL; return NULL;
} }
} }
...@@ -1551,17 +1581,16 @@ static struct PyMethodDef csv_methods[] = { ...@@ -1551,17 +1581,16 @@ static struct PyMethodDef csv_methods[] = {
{ NULL, NULL } { NULL, NULL }
}; };
static struct PyModuleDef _csvmodule = { static struct PyModuleDef _csvmodule = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
"_csv", "_csv",
csv_module_doc, csv_module_doc,
-1, sizeof(_csvstate),
csv_methods, csv_methods,
NULL, NULL,
NULL, _csv_traverse,
NULL, _csv_clear,
NULL _csv_free
}; };
PyMODINIT_FUNC PyMODINIT_FUNC
...@@ -1589,11 +1618,16 @@ PyInit__csv(void) ...@@ -1589,11 +1618,16 @@ PyInit__csv(void)
MODULE_VERSION) == -1) MODULE_VERSION) == -1)
return NULL; return NULL;
/* Set the field limit */
_csvstate(module)->field_limit = 128 * 1024;
/* Do I still need to add this var to the Module Dict? */
/* Add _dialects dictionary */ /* Add _dialects dictionary */
dialects = PyDict_New(); _csvstate(module)->dialects = PyDict_New();
if (dialects == NULL) if (_csvstate(module)->dialects == NULL)
return NULL; return NULL;
if (PyModule_AddObject(module, "_dialects", dialects)) Py_INCREF(_csvstate(module)->dialects);
if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
return NULL; return NULL;
/* Add quote styles into dictionary */ /* Add quote styles into dictionary */
...@@ -1609,9 +1643,10 @@ PyInit__csv(void) ...@@ -1609,9 +1643,10 @@ PyInit__csv(void)
return NULL; return NULL;
/* Add the CSV exception object to the module. */ /* Add the CSV exception object to the module. */
error_obj = PyErr_NewException("_csv.Error", NULL, NULL); _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
if (error_obj == NULL) if (_csvstate(module)->error_obj == NULL)
return NULL; return NULL;
PyModule_AddObject(module, "Error", error_obj); Py_INCREF(_csvstate(module)->error_obj);
PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
return module; return module;
} }
...@@ -349,7 +349,8 @@ PyException_SetContext(PyObject *self, PyObject *context) { ...@@ -349,7 +349,8 @@ PyException_SetContext(PyObject *self, PyObject *context) {
static struct PyMemberDef BaseException_members[] = { static struct PyMemberDef BaseException_members[] = {
{"__suppress_context__", T_BOOL, {"__suppress_context__", T_BOOL,
offsetof(PyBaseExceptionObject, suppress_context)} offsetof(PyBaseExceptionObject, suppress_context)},
{NULL}
}; };
......
...@@ -308,7 +308,7 @@ compute_range_item(rangeobject *r, PyObject *arg) ...@@ -308,7 +308,7 @@ compute_range_item(rangeobject *r, PyObject *arg)
static PyObject * static PyObject *
range_item(rangeobject *r, Py_ssize_t i) range_item(rangeobject *r, Py_ssize_t i)
{ {
PyObject *res, *arg = PyLong_FromLong(i); PyObject *res, *arg = PyLong_FromSsize_t(i);
if (!arg) { if (!arg) {
return NULL; return NULL;
} }
......
...@@ -215,7 +215,6 @@ InvalidContinuation: ...@@ -215,7 +215,6 @@ InvalidContinuation:
goto Return; goto Return;
} }
#undef LONG_PTR_MASK
#undef ASCII_CHAR_MASK #undef ASCII_CHAR_MASK
...@@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, ...@@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
#undef MAX_SHORT_UNICHARS #undef MAX_SHORT_UNICHARS
} }
/* The pattern for constructing UCS2-repeated masks. */
#if SIZEOF_LONG == 8
# define UCS2_REPEAT_MASK 0x0001000100010001ul
#elif SIZEOF_LONG == 4
# define UCS2_REPEAT_MASK 0x00010001ul
#else
# error C 'long' size should be either 4 or 8!
#endif
/* The mask for fast checking. */
#if STRINGLIB_SIZEOF_CHAR == 1
/* The mask for fast checking of whether a C 'long' contains a
non-ASCII or non-Latin1 UTF16-encoded characters. */
# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
#else
/* The mask for fast checking of whether a C 'long' may contain
UTF16-encoded surrogate characters. This is an efficient heuristic,
assuming that non-surrogate characters with a code point >= 0x8000 are
rare in most input.
*/
# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u)
#endif
/* The mask for fast byte-swapping. */
#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu)
/* Swap bytes. */
#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \
(((value) & STRIPPED_MASK) << 8))
Py_LOCAL_INLINE(Py_UCS4)
STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
int native_ordering)
{
Py_UCS4 ch;
const unsigned char *aligned_end =
(const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
const unsigned char *q = *inptr;
STRINGLIB_CHAR *p = dest + *outpos;
/* Offsets from q for retrieving byte pairs in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
int ihi = !!native_ordering, ilo = !native_ordering;
#else
int ihi = !native_ordering, ilo = !!native_ordering;
#endif
--e;
while (q < e) {
Py_UCS4 ch2;
/* First check for possible aligned read of a C 'long'. Unaligned
reads are more expensive, better to defer to another iteration. */
if (!((size_t) q & LONG_PTR_MASK)) {
/* Fast path for runs of in-range non-surrogate chars. */
register const unsigned char *_q = q;
while (_q < aligned_end) {
unsigned long block = * (unsigned long *) _q;
if (native_ordering) {
/* Can use buffer directly */
if (block & FAST_CHAR_MASK)
break;
}
else {
/* Need to byte-swap */
if (block & SWAB(FAST_CHAR_MASK))
break;
#if STRINGLIB_SIZEOF_CHAR == 1
block >>= 8;
#else
block = SWAB(block);
#endif
}
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
# if SIZEOF_LONG == 4
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
p[1] = (STRINGLIB_CHAR)(block >> 16);
# elif SIZEOF_LONG == 8
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
p[3] = (STRINGLIB_CHAR)(block >> 48);
# endif
#else
# if SIZEOF_LONG == 4
p[0] = (STRINGLIB_CHAR)(block >> 16);
p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
# elif SIZEOF_LONG == 8
p[0] = (STRINGLIB_CHAR)(block >> 48);
p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
# endif
#endif
_q += SIZEOF_LONG;
p += SIZEOF_LONG / 2;
}
q = _q;
if (q >= e)
break;
}
ch = (q[ihi] << 8) | q[ilo];
q += 2;
if (!Py_UNICODE_IS_SURROGATE(ch)) {
#if STRINGLIB_SIZEOF_CHAR < 2
if (ch > STRINGLIB_MAX_CHAR)
/* Out-of-range */
goto Return;
#endif
*p++ = (STRINGLIB_CHAR)ch;
continue;
}
/* UTF-16 code pair: */
if (q >= e)
goto UnexpectedEnd;
if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
goto IllegalEncoding;
ch2 = (q[ihi] << 8) | q[ilo];
q += 2;
if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
goto IllegalSurrogate;
ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
#if STRINGLIB_SIZEOF_CHAR < 4
/* Out-of-range */
goto Return;
#else
*p++ = (STRINGLIB_CHAR)ch;
#endif
}
ch = 0;
Return:
*inptr = q;
*outpos = p - dest;
return ch;
UnexpectedEnd:
ch = 1;
goto Return;
IllegalEncoding:
ch = 2;
goto Return;
IllegalSurrogate:
ch = 3;
goto Return;
}
#undef UCS2_REPEAT_MASK
#undef FAST_CHAR_MASK
#undef STRIPPED_MASK
#undef SWAB
#undef LONG_PTR_MASK
#endif /* STRINGLIB_IS_UNICODE */ #endif /* STRINGLIB_IS_UNICODE */
This diff is collapsed.
...@@ -25,6 +25,8 @@ def main(input_path, output_path): ...@@ -25,6 +25,8 @@ def main(input_path, output_path):
with open(output_path, 'w', encoding='utf-8') as output_file: with open(output_path, 'w', encoding='utf-8') as output_file:
output_file.write('\n'.join(lines)) output_file.write('\n'.join(lines))
output_file.write('/* Mercurial binary marker: \x00 */') output_file.write('/* Mercurial binary marker: \x00 */')
# Avoid a compiler warning for lack of EOL
output_file.write('\n')
if __name__ == '__main__': if __name__ == '__main__':
......
No preview for this file type
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment