Commit 981babc8 authored by Guido van Rossum's avatar Guido van Rossum

Add an errors parameter to open() and TextIOWrapper() to specify error handling.

parent eb44d891
...@@ -9,7 +9,7 @@ extern "C" { ...@@ -9,7 +9,7 @@ extern "C" {
#define PY_STDIOTEXTMODE "b" #define PY_STDIOTEXTMODE "b"
PyAPI_FUNC(PyObject *) PyFile_FromFd(int, char *, char *, int, char *, char *, PyAPI_FUNC(PyObject *) PyFile_FromFd(int, char *, char *, int, char *, char *,
int); char *, int);
PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int);
PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int);
PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *);
......
...@@ -49,8 +49,8 @@ class BlockingIOError(IOError): ...@@ -49,8 +49,8 @@ class BlockingIOError(IOError):
self.characters_written = characters_written self.characters_written = characters_written
def open(file, mode="r", buffering=None, encoding=None, newline=None, def open(file, mode="r", buffering=None, encoding=None, errors=None,
closefd=True): newline=None, closefd=True):
r"""Replacement for the built-in open function. r"""Replacement for the built-in open function.
Args: Args:
...@@ -61,6 +61,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, ...@@ -61,6 +61,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
can be: 0 = unbuffered, 1 = line buffered, can be: 0 = unbuffered, 1 = line buffered,
larger = fully buffered. larger = fully buffered.
encoding: optional string giving the text encoding. encoding: optional string giving the text encoding.
errors: optional string giving the encoding error handling.
newline: optional newlines specifier; must be None, '', '\n', '\r' newline: optional newlines specifier; must be None, '', '\n', '\r'
or '\r\n'; all other values are illegal. It controls the or '\r\n'; all other values are illegal. It controls the
handling of line endings. It works as follows: handling of line endings. It works as follows:
...@@ -99,7 +100,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, ...@@ -99,7 +100,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
'U': universal newline mode (for backwards compatibility) 'U': universal newline mode (for backwards compatibility)
Constraints: Constraints:
- encoding must not be given when a binary mode is given - encoding or errors must not be given when a binary mode is given
- buffering must not be zero when a text mode is given - buffering must not be zero when a text mode is given
Returns: Returns:
...@@ -115,6 +116,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, ...@@ -115,6 +116,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
raise TypeError("invalid buffering: %r" % buffering) raise TypeError("invalid buffering: %r" % buffering)
if encoding is not None and not isinstance(encoding, str): if encoding is not None and not isinstance(encoding, str):
raise TypeError("invalid encoding: %r" % encoding) raise TypeError("invalid encoding: %r" % encoding)
if errors is not None and not isinstance(errors, str):
raise TypeError("invalid errors: %r" % errors)
modes = set(mode) modes = set(mode)
if modes - set("arwb+tU") or len(mode) > len(modes): if modes - set("arwb+tU") or len(mode) > len(modes):
raise ValueError("invalid mode: %r" % mode) raise ValueError("invalid mode: %r" % mode)
...@@ -136,6 +139,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, ...@@ -136,6 +139,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
raise ValueError("must have exactly one of read/write/append mode") raise ValueError("must have exactly one of read/write/append mode")
if binary and encoding is not None: if binary and encoding is not None:
raise ValueError("binary mode doesn't take an encoding argument") raise ValueError("binary mode doesn't take an encoding argument")
if binary and errors is not None:
raise ValueError("binary mode doesn't take an errors argument")
if binary and newline is not None: if binary and newline is not None:
raise ValueError("binary mode doesn't take a newline argument") raise ValueError("binary mode doesn't take a newline argument")
raw = FileIO(file, raw = FileIO(file,
...@@ -177,7 +182,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, ...@@ -177,7 +182,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
buffer.name = file buffer.name = file
buffer.mode = mode buffer.mode = mode
return buffer return buffer
text = TextIOWrapper(buffer, encoding, newline) text = TextIOWrapper(buffer, encoding, errors, newline)
text.name = file text.name = file
text.mode = mode text.mode = mode
return text return text
...@@ -1128,7 +1133,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1128,7 +1133,7 @@ class TextIOWrapper(TextIOBase):
_CHUNK_SIZE = 128 _CHUNK_SIZE = 128
def __init__(self, buffer, encoding=None, newline=None): def __init__(self, buffer, encoding=None, errors=None, newline=None):
if newline not in (None, "", "\n", "\r", "\r\n"): if newline not in (None, "", "\n", "\r", "\r\n"):
raise ValueError("illegal newline value: %r" % (newline,)) raise ValueError("illegal newline value: %r" % (newline,))
if encoding is None: if encoding is None:
...@@ -1148,8 +1153,15 @@ class TextIOWrapper(TextIOBase): ...@@ -1148,8 +1153,15 @@ class TextIOWrapper(TextIOBase):
if not isinstance(encoding, str): if not isinstance(encoding, str):
raise ValueError("invalid encoding: %r" % encoding) raise ValueError("invalid encoding: %r" % encoding)
if errors is None:
errors = "strict"
else:
if not isinstance(errors, str):
raise ValueError("invalid errors: %r" % errors)
self.buffer = buffer self.buffer = buffer
self._encoding = encoding self._encoding = encoding
self._errors = errors
self._readuniversal = not newline self._readuniversal = not newline
self._readtranslate = newline is None self._readtranslate = newline is None
self._readnl = newline self._readnl = newline
...@@ -1164,6 +1176,10 @@ class TextIOWrapper(TextIOBase): ...@@ -1164,6 +1176,10 @@ class TextIOWrapper(TextIOBase):
def encoding(self): def encoding(self):
return self._encoding return self._encoding
@property
def errors(self):
return self._errors
# A word about _snapshot. This attribute is either None, or a # A word about _snapshot. This attribute is either None, or a
# tuple (decoder_state, readahead, pending) where decoder_state is # tuple (decoder_state, readahead, pending) where decoder_state is
# the second (integer) item of the decoder state, readahead is the # the second (integer) item of the decoder state, readahead is the
...@@ -1206,7 +1222,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1206,7 +1222,7 @@ class TextIOWrapper(TextIOBase):
if haslf and self._writetranslate and self._writenl != "\n": if haslf and self._writetranslate and self._writenl != "\n":
s = s.replace("\n", self._writenl) s = s.replace("\n", self._writenl)
# XXX What if we were just reading? # XXX What if we were just reading?
b = s.encode(self._encoding) b = s.encode(self._encoding, self._errors)
self.buffer.write(b) self.buffer.write(b)
if haslf and self.isatty(): if haslf and self.isatty():
self.flush() self.flush()
...@@ -1220,7 +1236,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1220,7 +1236,7 @@ class TextIOWrapper(TextIOBase):
if make_decoder is None: if make_decoder is None:
raise IOError("Can't find an incremental decoder for encoding %s" % raise IOError("Can't find an incremental decoder for encoding %s" %
self._encoding) self._encoding)
decoder = make_decoder() # XXX: errors decoder = make_decoder(self._errors)
if self._readuniversal: if self._readuniversal:
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
self._decoder = decoder self._decoder = decoder
...@@ -1447,9 +1463,11 @@ class StringIO(TextIOWrapper): ...@@ -1447,9 +1463,11 @@ class StringIO(TextIOWrapper):
# XXX This is really slow, but fully functional # XXX This is really slow, but fully functional
def __init__(self, initial_value="", encoding="utf-8", newline="\n"): def __init__(self, initial_value="", encoding="utf-8",
errors="strict", newline="\n"):
super(StringIO, self).__init__(BytesIO(), super(StringIO, self).__init__(BytesIO(),
encoding=encoding, encoding=encoding,
errors=errors,
newline=newline) newline=newline)
if initial_value: if initial_value:
if not isinstance(initial_value, str): if not isinstance(initial_value, str):
...@@ -1459,4 +1477,4 @@ class StringIO(TextIOWrapper): ...@@ -1459,4 +1477,4 @@ class StringIO(TextIOWrapper):
def getvalue(self): def getvalue(self):
self.flush() self.flush()
return self.buffer.getvalue().decode(self._encoding) return self.buffer.getvalue().decode(self._encoding, self._errors)
...@@ -496,6 +496,46 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -496,6 +496,46 @@ class TextIOWrapperTest(unittest.TestCase):
def tearDown(self): def tearDown(self):
test_support.unlink(test_support.TESTFN) test_support.unlink(test_support.TESTFN)
def testEncodingErrorsReading(self):
# (1) default
b = io.BytesIO(b"abc\n\xff\n")
t = io.TextIOWrapper(b, encoding="ascii")
self.assertRaises(UnicodeError, t.read)
# (2) explicit strict
b = io.BytesIO(b"abc\n\xff\n")
t = io.TextIOWrapper(b, encoding="ascii", errors="strict")
self.assertRaises(UnicodeError, t.read)
# (3) ignore
b = io.BytesIO(b"abc\n\xff\n")
t = io.TextIOWrapper(b, encoding="ascii", errors="ignore")
self.assertEquals(t.read(), "abc\n\n")
# (4) replace
b = io.BytesIO(b"abc\n\xff\n")
t = io.TextIOWrapper(b, encoding="ascii", errors="replace")
self.assertEquals(t.read(), "abc\n\ufffd\n")
def testEncodingErrorsWriting(self):
# (1) default
b = io.BytesIO()
t = io.TextIOWrapper(b, encoding="ascii")
self.assertRaises(UnicodeError, t.write, "\xff")
# (2) explicit strict
b = io.BytesIO()
t = io.TextIOWrapper(b, encoding="ascii", errors="strict")
self.assertRaises(UnicodeError, t.write, "\xff")
# (3) ignore
b = io.BytesIO()
t = io.TextIOWrapper(b, encoding="ascii", errors="ignore")
t.write("abc\xffdef\n")
t.flush()
self.assertEquals(b.getvalue(), b"abcdef\n")
# (4) replace
b = io.BytesIO()
t = io.TextIOWrapper(b, encoding="ascii", errors="replace")
t.write("abc\xffdef\n")
t.flush()
self.assertEquals(b.getvalue(), b"abc?def\n")
def testNewlinesInput(self): def testNewlinesInput(self):
testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
......
...@@ -915,6 +915,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ...@@ -915,6 +915,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL; return NULL;
} }
cr.real = PyFloat_AsDouble(tmp); cr.real = PyFloat_AsDouble(tmp);
cr.imag = 0.0; /* Shut up compiler warning */
Py_DECREF(tmp); Py_DECREF(tmp);
} }
if (i == NULL) { if (i == NULL) {
......
...@@ -27,15 +27,16 @@ extern "C" { ...@@ -27,15 +27,16 @@ extern "C" {
PyObject * PyObject *
PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding, PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding,
char *newline, int closefd) char *errors, char *newline, int closefd)
{ {
PyObject *io, *stream, *nameobj = NULL; PyObject *io, *stream, *nameobj = NULL;
io = PyImport_ImportModule("io"); io = PyImport_ImportModule("io");
if (io == NULL) if (io == NULL)
return NULL; return NULL;
stream = PyObject_CallMethod(io, "open", "isissi", fd, mode, stream = PyObject_CallMethod(io, "open", "isisssi", fd, mode,
buffering, encoding, newline, closefd); buffering, encoding, errors,
newline, closefd);
Py_DECREF(io); Py_DECREF(io);
if (stream == NULL) if (stream == NULL)
return NULL; return NULL;
......
...@@ -2602,7 +2602,7 @@ call_find_module(char *name, PyObject *path) ...@@ -2602,7 +2602,7 @@ call_find_module(char *name, PyObject *path)
(char*)PyUnicode_GetDefaultEncoding(); (char*)PyUnicode_GetDefaultEncoding();
} }
fob = PyFile_FromFd(fd, pathname, fdp->mode, -1, fob = PyFile_FromFd(fd, pathname, fdp->mode, -1,
(char*)encoding, NULL, 1); (char*)encoding, NULL, NULL, 1);
if (fob == NULL) { if (fob == NULL) {
close(fd); close(fd);
PyMem_FREE(found_encoding); PyMem_FREE(found_encoding);
......
...@@ -770,7 +770,7 @@ initstdio(void) ...@@ -770,7 +770,7 @@ initstdio(void)
#endif #endif
} }
else { else {
if (!(std = PyFile_FromFd(fd, "<stdin>", "r", -1, NULL, if (!(std = PyFile_FromFd(fd, "<stdin>", "r", -1, NULL, NULL,
"\n", 0))) { "\n", 0))) {
goto error; goto error;
} }
...@@ -790,7 +790,7 @@ initstdio(void) ...@@ -790,7 +790,7 @@ initstdio(void)
#endif #endif
} }
else { else {
if (!(std = PyFile_FromFd(fd, "<stdout>", "w", -1, NULL, if (!(std = PyFile_FromFd(fd, "<stdout>", "w", -1, NULL, NULL,
"\n", 0))) { "\n", 0))) {
goto error; goto error;
} }
...@@ -811,7 +811,7 @@ initstdio(void) ...@@ -811,7 +811,7 @@ initstdio(void)
#endif #endif
} }
else { else {
if (!(std = PyFile_FromFd(fd, "<stderr>", "w", -1, NULL, if (!(std = PyFile_FromFd(fd, "<stderr>", "w", -1, NULL, NULL,
"\n", 0))) { "\n", 0))) {
goto error; goto error;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment