Commit 85e3ee74 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.

parent 20d31b51
...@@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase): ...@@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase):
return buffer return buffer
def seek(self, cookie, whence=0): def seek(self, cookie, whence=0):
def _reset_encoder(position):
"""Reset the encoder (merely useful for proper BOM handling)"""
try:
encoder = self._encoder or self._get_encoder()
except LookupError:
# Sometimes the encoder doesn't exist
pass
else:
if position != 0:
encoder.setstate(0)
else:
encoder.reset()
if self.closed: if self.closed:
raise ValueError("tell on closed file") raise ValueError("tell on closed file")
if not self._seekable: if not self._seekable:
...@@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None self._snapshot = None
if self._decoder: if self._decoder:
self._decoder.reset() self._decoder.reset()
_reset_encoder(position)
return position return position
if whence != 0: if whence != 0:
raise ValueError("unsupported whence (%r)" % (whence,)) raise ValueError("unsupported whence (%r)" % (whence,))
...@@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase):
raise OSError("can't restore logical file position") raise OSError("can't restore logical file position")
self._decoded_chars_used = chars_to_skip self._decoded_chars_used = chars_to_skip
# Finally, reset the encoder (merely useful for proper BOM handling) _reset_encoder(cookie)
try:
encoder = self._encoder or self._get_encoder()
except LookupError:
# Sometimes the encoder doesn't exist
pass
else:
if cookie != 0:
encoder.setstate(0)
else:
encoder.reset()
return cookie return cookie
def read(self, size=None): def read(self, size=None):
......
...@@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase):
with self.open(filename, 'rb') as f: with self.open(filename, 'rb') as f:
self.assertEqual(f.read(), 'bbbzzz'.encode(charset)) self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
def test_seek_append_bom(self):
# Same test, but first seek to the start and then to the end
filename = support.TESTFN
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
with self.open(filename, 'w', encoding=charset) as f:
f.write('aaa')
with self.open(filename, 'a', encoding=charset) as f:
f.seek(0)
f.seek(0, self.SEEK_END)
f.write('xxx')
with self.open(filename, 'rb') as f:
self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
def test_errors_property(self): def test_errors_property(self):
with self.open(support.TESTFN, "w") as f: with self.open(support.TESTFN, "w") as f:
self.assertEqual(f.errors, "strict") self.assertEqual(f.errors, "strict")
......
...@@ -29,6 +29,9 @@ Core and Builtins ...@@ -29,6 +29,9 @@ Core and Builtins
Library Library
------- -------
- Issue #22982: Improve BOM handling when seeking to multiple positions of
a writable text file.
- Issue #23865: close() methods in multiple modules now are idempotent and more - Issue #23865: close() methods in multiple modules now are idempotent and more
robust at shutdown. If needs to release multiple resources, they are released robust at shutdown. If needs to release multiple resources, they are released
even if errors are occured. even if errors are occured.
......
...@@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) ...@@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
} }
static int static int
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) _textiowrapper_encoder_reset(textio *self, int start_of_stream)
{ {
PyObject *res; PyObject *res;
/* Same as _textiowrapper_decoder_setstate() above. */ if (start_of_stream) {
if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
self->encoding_start_of_stream = 1; self->encoding_start_of_stream = 1;
} }
...@@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) ...@@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
return 0; return 0;
} }
static int
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
{
/* Same as _textiowrapper_decoder_setstate() above. */
return _textiowrapper_encoder_reset(
self, cookie->start_pos == 0 && cookie->dec_flags == 0);
}
static PyObject * static PyObject *
textiowrapper_seek(textio *self, PyObject *args) textiowrapper_seek(textio *self, PyObject *args)
{ {
...@@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args) ...@@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args)
} }
res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Py_XDECREF(cookieObj); Py_CLEAR(cookieObj);
if (res == NULL)
goto fail;
if (self->encoder) {
/* If seek() == 0, we are at the start of stream, otherwise not */
cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
Py_DECREF(res);
goto fail;
}
}
return res; return res;
} }
else if (whence != 0) { else if (whence != 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment