Commit cba608ca authored by Guido van Rossum's avatar Guido van Rossum

More efficient implementation of tell(); _read_chunk() doesn't have to

call self.buffer.tell().
parent 0dd32e24
...@@ -897,11 +897,11 @@ class TextIOWrapper(TextIOBase): ...@@ -897,11 +897,11 @@ class TextIOWrapper(TextIOBase):
self._seekable = self.buffer.seekable() self._seekable = self.buffer.seekable()
# A word about _snapshot. This attribute is either None, or a # A word about _snapshot. This attribute is either None, or a
# tuple (position, decoder_pickle, readahead) where position is a # tuple (decoder_pickle, readahead, pending) where decoder_pickle
# position of the underlying buffer, decoder_pickle is a pickled # is a pickled decoder state, readahead is the chunk of bytes that
# decoder state, and readahead is the chunk of bytes that was read # was read, and pending is the characters that were rendered by
# from that position. We use this to reconstruct intermediate # the decoder after feeding it those bytes. We use this to
# decoder states in tell(). # reconstruct intermediate decoder states in tell().
def _seekable(self): def _seekable(self):
return self._seekable return self._seekable
...@@ -944,14 +944,16 @@ class TextIOWrapper(TextIOBase): ...@@ -944,14 +944,16 @@ class TextIOWrapper(TextIOBase):
return decoder return decoder
def _read_chunk(self): def _read_chunk(self):
if not self._seekable:
return self.buffer.read(self._CHUNK_SIZE)
assert self._decoder is not None assert self._decoder is not None
position = self.buffer.tell() if not self._seekable:
readahead = self.buffer.read(self._CHUNK_SIZE)
pending = self._decoder.decode(readahead, not readahead)
return readahead, pending
decoder_state = pickle.dumps(self._decoder, 2) decoder_state = pickle.dumps(self._decoder, 2)
readahead = self.buffer.read(self._CHUNK_SIZE) readahead = self.buffer.read(self._CHUNK_SIZE)
self._snapshot = (position, decoder_state, readahead) pending = self._decoder.decode(readahead, not readahead)
return readahead self._snapshot = (decoder_state, readahead, pending)
return readahead, pending
def _encode_decoder_state(self, ds, pos): def _encode_decoder_state(self, ds, pos):
if ds == self._decoder_in_rest_pickle: if ds == self._decoder_in_rest_pickle:
...@@ -975,21 +977,22 @@ class TextIOWrapper(TextIOBase): ...@@ -975,21 +977,22 @@ class TextIOWrapper(TextIOBase):
if not self._seekable: if not self._seekable:
raise IOError("Underlying stream is not seekable") raise IOError("Underlying stream is not seekable")
self.flush() self.flush()
position = self.buffer.tell()
if self._decoder is None or self._snapshot is None: if self._decoder is None or self._snapshot is None:
assert self._pending == "" assert self._pending == ""
return self.buffer.tell() return position
position, decoder_state, readahead = self._snapshot decoder_state, readahead, pending = self._snapshot
position -= len(readahead)
needed = len(pending) - len(self._pending)
if not needed:
return self._encode_decoder_state(decoder_state, position)
decoder = pickle.loads(decoder_state) decoder = pickle.loads(decoder_state)
characters = "" n = 0
sequence = []
for i, b in enumerate(readahead): for i, b in enumerate(readahead):
c = decoder.decode(bytes([b])) n += len(decoder.decode(bytes([b])))
if c: if n >= needed:
characters += c decoder_state = pickle.dumps(decoder, 2)
sequence.append((characters, i+1, pickle.dumps(decoder, 2))) return self._encode_decoder_state(decoder_state, position+i+1)
for ch, i, st in sequence:
if ch + self._pending == characters:
return self._encode_decoder_state(st, position + i)
raise IOError("Can't reconstruct logical file position") raise IOError("Can't reconstruct logical file position")
def seek(self, pos, whence=0): def seek(self, pos, whence=0):
...@@ -1023,9 +1026,11 @@ class TextIOWrapper(TextIOBase): ...@@ -1023,9 +1026,11 @@ class TextIOWrapper(TextIOBase):
return pos return pos
decoder = pickle.loads(ds) decoder = pickle.loads(ds)
self.buffer.seek(pos) self.buffer.seek(pos)
self._snapshot = (pos, ds, "") self._snapshot = (ds, b"", "")
self._pending = "" self._pending = ""
self._decoder = None if not self._decoder_in_rest_pickle:
self._get_decoder() # For its side effect
self._decoder = decoder
return orig_pos return orig_pos
def read(self, n: int = -1): def read(self, n: int = -1):
...@@ -1038,9 +1043,9 @@ class TextIOWrapper(TextIOBase): ...@@ -1038,9 +1043,9 @@ class TextIOWrapper(TextIOBase):
return res return res
else: else:
while len(res) < n: while len(res) < n:
data = self._read_chunk() readahead, pending = self._read_chunk()
res += decoder.decode(data, not data) res += pending
if not data: if not readahead:
break break
self._pending = res[n:] self._pending = res[n:]
return res[:n] return res[:n]
...@@ -1087,9 +1092,9 @@ class TextIOWrapper(TextIOBase): ...@@ -1087,9 +1092,9 @@ class TextIOWrapper(TextIOBase):
# No line ending seen yet - get more data # No line ending seen yet - get more data
while True: while True:
data = self._read_chunk() readahead, pending = self._read_chunk()
more_line = decoder.decode(data, not data) more_line = pending
if more_line or not data: if more_line or not readahead:
break break
if not more_line: if not more_line:
......
...@@ -532,25 +532,21 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -532,25 +532,21 @@ class TextIOWrapperTest(unittest.TestCase):
f.truncate() f.truncate()
sample = u"s\xff\u0fff\uffff" sample = u"s\xff\u0fff\uffff"
wlines = [] wlines = []
for size in (0, 1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65, for size in (0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 62, 63, 64, 65, 1000):
100, 200, 300, 400, 500, 1000):
chars = [] chars = []
for i in xrange(size): for i in xrange(size):
chars.append(sample[i % len(sample)]) chars.append(sample[i % len(sample)])
line = u"".join(chars) + "\n" line = u"".join(chars) + "\n"
wlines.append((f.tell(), line)) wlines.append((f.tell(), line))
f.write(line) f.write(line)
wendpos = f.tell()
f.seek(0) f.seek(0)
rlines = [] rlines = []
while True: while True:
pos = f.tell() pos = f.tell()
line = f.readline() line = f.readline()
if not line: if not line:
rendpos = pos
break break
rlines.append((pos, line)) rlines.append((pos, line))
self.assertEquals(rendpos, wendpos)
self.assertEquals(rlines, wlines) self.assertEquals(rlines, wlines)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment