Commit ad983e79 authored by Raymond Hettinger's avatar Raymond Hettinger

Improve the implementation of itertools.tee().

Formerly, underlying queue was implemented in terms of two lists.  The
new queue is a series of singly-linked fixed length lists.

The new implementation runs much faster, supports multi-way tees, and
allows tees of tees without additional memory costs.

The root ideas for this structure were contributed by Andrew Koenig
and Guido van Rossum.
parent 767126d7
...@@ -281,9 +281,9 @@ by functions or loops that truncate the stream. ...@@ -281,9 +281,9 @@ by functions or loops that truncate the stream.
\end{verbatim} \end{verbatim}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{tee}{iterable} \begin{funcdesc}{tee}{iterable\optional{, n=2}}
Return two independent iterators from a single iterable. Return \var{n} independent iterators from a single iterable.
Equivalent to: The case where \var{n} is two is equivalent to:
\begin{verbatim} \begin{verbatim}
def tee(iterable): def tee(iterable):
...@@ -299,6 +299,10 @@ by functions or loops that truncate the stream. ...@@ -299,6 +299,10 @@ by functions or loops that truncate the stream.
return (gen(it.next), gen(it.next)) return (gen(it.next), gen(it.next))
\end{verbatim} \end{verbatim}
Note, once \function{tee()} has made a split, the original \var{iterable}
should not be used anywhere else; otherwise, the \var{iterable} could get
advanced without the tee objects being informed.
Note, this member of the toolkit may require significant auxiliary Note, this member of the toolkit may require significant auxiliary
storage (depending on how much temporary data needs to be stored). storage (depending on how much temporary data needs to be stored).
In general, if one iterator is going use most or all of the data before In general, if one iterator is going use most or all of the data before
...@@ -408,6 +412,10 @@ def repeatfunc(func, times=None, *args): ...@@ -408,6 +412,10 @@ def repeatfunc(func, times=None, *args):
def pairwise(iterable): def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..." "s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable) a, b = tee(iterable)
return izip(a, islice(b, 1, None)) try:
b.next()
except StopIteration:
pass
return izip(a, b)
\end{verbatim} \end{verbatim}
...@@ -200,7 +200,7 @@ class TestBasicOps(unittest.TestCase): ...@@ -200,7 +200,7 @@ class TestBasicOps(unittest.TestCase):
self.assertRaises(ValueError, dropwhile(errfunc, [(4,5)]).next) self.assertRaises(ValueError, dropwhile(errfunc, [(4,5)]).next)
def test_tee(self): def test_tee(self):
n = 100 n = 200
def irange(n): def irange(n):
for i in xrange(n): for i in xrange(n):
yield i yield i
...@@ -217,16 +217,16 @@ class TestBasicOps(unittest.TestCase): ...@@ -217,16 +217,16 @@ class TestBasicOps(unittest.TestCase):
self.assertEqual(list(b), range(n)) self.assertEqual(list(b), range(n))
a, b = tee(irange(n)) # test dealloc of leading iterator a, b = tee(irange(n)) # test dealloc of leading iterator
self.assertEqual(a.next(), 0) for i in xrange(100):
self.assertEqual(a.next(), 1) self.assertEqual(a.next(), i)
del a del a
self.assertEqual(list(b), range(n)) self.assertEqual(list(b), range(n))
a, b = tee(irange(n)) # test dealloc of trailing iterator a, b = tee(irange(n)) # test dealloc of trailing iterator
self.assertEqual(a.next(), 0) for i in xrange(100):
self.assertEqual(a.next(), 1) self.assertEqual(a.next(), i)
del b del b
self.assertEqual(list(a), range(2, n)) self.assertEqual(list(a), range(100, n))
for j in xrange(5): # test randomly interleaved for j in xrange(5): # test randomly interleaved
order = [0]*n + [1]*n order = [0]*n + [1]*n
...@@ -239,21 +239,31 @@ class TestBasicOps(unittest.TestCase): ...@@ -239,21 +239,31 @@ class TestBasicOps(unittest.TestCase):
self.assertEqual(lists[0], range(n)) self.assertEqual(lists[0], range(n))
self.assertEqual(lists[1], range(n)) self.assertEqual(lists[1], range(n))
# test argument format checking
self.assertRaises(TypeError, tee) self.assertRaises(TypeError, tee)
self.assertRaises(TypeError, tee, 3) self.assertRaises(TypeError, tee, 3)
self.assertRaises(TypeError, tee, [1,2], 'x') self.assertRaises(TypeError, tee, [1,2], 'x')
self.assertRaises(TypeError, tee, [1,2], 3, 'x')
try: # tee object should be instantiable
class A(tee): pass a, b = tee('abc')
except TypeError: c = type(a)('def')
pass self.assertEqual(list(c), list('def'))
else:
self.fail("tee constructor should not be subclassable") # test long-lagged and multi-way split
a, b, c = tee(xrange(2000), 3)
for i in xrange(100):
self.assertEqual(a.next(), i)
self.assertEqual(list(b), range(2000))
self.assertEqual([c.next(), c.next()], range(2))
self.assertEqual(list(a), range(100,2000))
self.assertEqual(list(c), range(2,2000))
# tee pass-through to copyable iterator
a, b = tee('abc')
c, d = tee(a)
self.assert_(a is c)
# tee_iterator should not be instantiable
a, b = tee(xrange(10))
self.assertRaises(TypeError, type(a))
self.assert_(a is iter(a)) # tee_iterator should support __iter__
def test_StopIteration(self): def test_StopIteration(self):
self.assertRaises(StopIteration, izip().next) self.assertRaises(StopIteration, izip().next)
...@@ -317,13 +327,6 @@ class TestGC(unittest.TestCase): ...@@ -317,13 +327,6 @@ class TestGC(unittest.TestCase):
a = [] a = []
self.makecycle(starmap(lambda *t: t, [(a,a)]*2), a) self.makecycle(starmap(lambda *t: t, [(a,a)]*2), a)
def test_tee(self):
a = []
p, q = t = tee([a]*2)
a += [a, p, q, t]
p.next()
del a, p, q, t
def R(seqn): def R(seqn):
'Regular generator' 'Regular generator'
for i in seqn: for i in seqn:
...@@ -626,7 +629,11 @@ Samuele ...@@ -626,7 +629,11 @@ Samuele
>>> def pairwise(iterable): >>> def pairwise(iterable):
... "s -> (s0,s1), (s1,s2), (s2, s3), ..." ... "s -> (s0,s1), (s1,s2), (s2, s3), ..."
... a, b = tee(iterable) ... a, b = tee(iterable)
... return izip(a, islice(b, 1, None)) ... try:
... b.next()
... except StopIteration:
... pass
... return izip(a, b)
This is not part of the examples but it tests to make sure the definitions This is not part of the examples but it tests to make sure the definitions
perform as purported. perform as purported.
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment