Commit a38f73b1 authored by Alexandre Vassalotti's avatar Alexandre Vassalotti

Fix issue1753: TextIOWrapper.write writes utf BOM for every string.

Patch by Erick Tryzelaar, with slight modifications by me.
parent 52d168a9
...@@ -1182,6 +1182,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1182,6 +1182,7 @@ class TextIOWrapper(TextIOBase):
self._readnl = newline self._readnl = newline
self._writetranslate = newline != '' self._writetranslate = newline != ''
self._writenl = newline or os.linesep self._writenl = newline or os.linesep
self._encoder = None
self._decoder = None self._decoder = None
self._pending = "" self._pending = ""
self._snapshot = None self._snapshot = None
...@@ -1240,8 +1241,9 @@ class TextIOWrapper(TextIOBase): ...@@ -1240,8 +1241,9 @@ class TextIOWrapper(TextIOBase):
haslf = (self._writetranslate or self._line_buffering) and "\n" in s haslf = (self._writetranslate or self._line_buffering) and "\n" in s
if haslf and self._writetranslate and self._writenl != "\n": if haslf and self._writetranslate and self._writenl != "\n":
s = s.replace("\n", self._writenl) s = s.replace("\n", self._writenl)
encoder = self._encoder or self._get_encoder()
# XXX What if we were just reading? # XXX What if we were just reading?
b = s.encode(self._encoding, self._errors) b = encoder.encode(s)
self.buffer.write(b) self.buffer.write(b)
if self._line_buffering and (haslf or "\r" in s): if self._line_buffering and (haslf or "\r" in s):
self.flush() self.flush()
...@@ -1250,11 +1252,13 @@ class TextIOWrapper(TextIOBase): ...@@ -1250,11 +1252,13 @@ class TextIOWrapper(TextIOBase):
self._decoder.reset() self._decoder.reset()
return length return length
def _get_encoder(self):
make_encoder = codecs.getincrementalencoder(self._encoding)
self._encoder = make_encoder(self._errors)
return self._encoder
def _get_decoder(self): def _get_decoder(self):
make_decoder = codecs.getincrementaldecoder(self._encoding) make_decoder = codecs.getincrementaldecoder(self._encoding)
if make_decoder is None:
raise IOError("Can't find an incremental decoder for encoding %s" %
self._encoding)
decoder = make_decoder(self._errors) decoder = make_decoder(self._errors)
if self._readuniversal: if self._readuniversal:
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
......
...@@ -765,6 +765,24 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -765,6 +765,24 @@ class TextIOWrapperTest(unittest.TestCase):
f.readline() f.readline()
f.tell() f.tell()
def testEncodedWrites(self):
data = "1234567890"
tests = ("utf-16",
"utf-16-le",
"utf-16-be",
"utf-32",
"utf-32-le",
"utf-32-be")
for encoding in tests:
buf = io.BytesIO()
f = io.TextIOWrapper(buf, encoding=encoding)
# Check if the BOM is written only once (see issue1753).
f.write(data)
f.write(data)
f.seek(0)
self.assertEquals(f.read(), data * 2)
self.assertEquals(buf.getvalue(), (data * 2).encode(encoding))
def timingTest(self): def timingTest(self):
timer = time.time timer = time.time
enc = "utf8" enc = "utf8"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment