Commit bc8e642c authored by Walter Dörwald's avatar Walter Dörwald

If the data read from the bytestream in readline() ends in a '\r' read one more

byte, even if the user has passed a size parameter. This extra byte shouldn't
cause a buffer overflow in the tokenizer. The original plan was to return a line
ending in '\r', which might be recognizable as a complete line and skip any '\n'
that was read afterwards. Unfortunately this didn't work, as the tokenizer only
recognizes '\n' as line ends, which in turn lead to joined lines and
SyntaxErrors, so this special treatment of a split '\r\n' has been dropped. (It
can only happen with a temporarily exhausted bytestream now anyway.)
Fixes parts of SF bugs #1163244 and #1175396.
parent 49ab700c
...@@ -230,7 +230,6 @@ class StreamReader(Codec): ...@@ -230,7 +230,6 @@ class StreamReader(Codec):
self.errors = errors self.errors = errors
self.bytebuffer = "" self.bytebuffer = ""
self.charbuffer = u"" self.charbuffer = u""
self.atcr = False
def decode(self, input, errors='strict'): def decode(self, input, errors='strict'):
raise NotImplementedError raise NotImplementedError
...@@ -306,18 +305,12 @@ class StreamReader(Codec): ...@@ -306,18 +305,12 @@ class StreamReader(Codec):
# If size is given, we call read() only once # If size is given, we call read() only once
while True: while True:
data = self.read(readsize) data = self.read(readsize)
if self.atcr and data.startswith(u"\n"):
data = data[1:]
if data: if data:
self.atcr = data.endswith(u"\r") # If we're at a "\r" read one # extra character # (which might
# If we're at a "\r" (and are allowed to read more), read one # be a "\n") to get a proper # line ending. If the stream is
# extra character (which might be a "\n") to get a proper # temporarily exhausted we return the wrong line ending.
# line ending. (If the stream is temporarily exhausted we return if data.endswith(u"\r"):
# the wrong line ending, but at least we won't generate a bogus
# second line.)
if self.atcr and size is None:
data += self.read(size=1, chars=1) data += self.read(size=1, chars=1)
self.atcr = data.endswith(u"\r")
line += data line += data
lines = line.splitlines(True) lines = line.splitlines(True)
...@@ -367,7 +360,6 @@ class StreamReader(Codec): ...@@ -367,7 +360,6 @@ class StreamReader(Codec):
""" """
self.bytebuffer = "" self.bytebuffer = ""
self.charbuffer = u"" self.charbuffer = u""
self.atcr = False
def seek(self, offset, whence=0): def seek(self, offset, whence=0):
""" Set the input stream's current position. """ Set the input stream's current position.
......
...@@ -266,6 +266,12 @@ Library ...@@ -266,6 +266,12 @@ Library
- Bug #1149508: ``textwrap`` now handles hyphenated numbers (eg. "2004-03-05") - Bug #1149508: ``textwrap`` now handles hyphenated numbers (eg. "2004-03-05")
correctly. correctly.
- Partial fixes for SF bugs #1163244 and #1175396: If a chunk read by
``codecs.StreamReader.readline()`` has a trailing "\r", read one more
character even if the user has passed a size parameter to get a proper
line ending. Remove the special handling of a "\r\n" that has been split
between two lines.
Build Build
----- -----
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment