Commit d36862cf authored by Raymond Hettinger's avatar Raymond Hettinger

Add itertools.izip_longest().

parent 15cade05
......@@ -302,6 +302,33 @@ by functions or loops that truncate the stream.
don't care about trailing, unmatched values from the longer iterables.
\end{funcdesc}
\begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}}
Make an iterator that aggregates elements from each of the iterables.
If the iterables are of uneven length, missing values are filled-in
with \var{fillvalue}. Iteration continues until the longest iterable
is exhausted. Equivalent to:
\begin{verbatim}
def izip_longest(*args, **kwds):
fillvalue = kwds.get('fillvalue')
def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
yield counter() # yields the fillvalue, or raises IndexError
fillers = repeat(fillvalue)
iters = [chain(it, sentinel(), fillers) for it in args]
try:
for tup in izip(*iters):
yield tup
except IndexError:
pass
\end{verbatim}
If one of the iterables is potentially infinite, then the
\function{izip_longest()} function should be wrapped with something
that limits the number of calls (for example \function{islice()} or
\function{take()}).
\versionadded{2.6}
\end{funcdesc}
\begin{funcdesc}{repeat}{object\optional{, times}}
Make an iterator that returns \var{object} over and over again.
Runs indefinitely unless the \var{times} argument is specified.
......
......@@ -198,6 +198,51 @@ class TestBasicOps(unittest.TestCase):
ids = map(id, list(izip('abc', 'def')))
self.assertEqual(len(dict.fromkeys(ids)), len(ids))
def test_iziplongest(self):
for args in [
['abc', range(6)],
[range(6), 'abc'],
[range(1000), range(2000,2100), range(3000,3050)],
[range(1000), range(0), range(3000,3050), range(1200), range(1500)],
[range(1000), range(0), range(3000,3050), range(1200), range(1500), range(0)],
]:
target = map(None, *args)
self.assertEqual(list(izip_longest(*args)), target)
self.assertEqual(list(izip_longest(*args, **{})), target)
target = [tuple((e is None and 'X' or e) for e in t) for t in target] # Replace None fills with 'X'
self.assertEqual(list(izip_longest(*args, **dict(fillvalue='X'))), target)
self.assertEqual(take(3,izip_longest('abcdef', count())), zip('abcdef', range(3))) # take 3 from infinite input
self.assertEqual(list(izip_longest()), zip())
self.assertEqual(list(izip_longest([])), zip([]))
self.assertEqual(list(izip_longest('abcdef')), zip('abcdef'))
self.assertEqual(list(izip_longest('abc', 'defg', **{})), map(None, 'abc', 'defg')) # empty keyword dict
self.assertRaises(TypeError, izip_longest, 3)
self.assertRaises(TypeError, izip_longest, range(3), 3)
for stmt in [
"izip_longest('abc', fv=1)",
"izip_longest('abc', fillvalue=1, bogus_keyword=None)",
]:
try:
eval(stmt, globals(), locals())
except TypeError:
pass
else:
self.fail('Did not raise Type in: ' + stmt)
# Check tuple re-use (implementation detail)
self.assertEqual([tuple(list(pair)) for pair in izip_longest('abc', 'def')],
zip('abc', 'def'))
self.assertEqual([pair for pair in izip_longest('abc', 'def')],
zip('abc', 'def'))
ids = map(id, izip_longest('abc', 'def'))
self.assertEqual(min(ids), max(ids))
ids = map(id, list(izip_longest('abc', 'def')))
self.assertEqual(len(dict.fromkeys(ids)), len(ids))
def test_repeat(self):
self.assertEqual(zip(xrange(3),repeat('a')),
[(0, 'a'), (1, 'a'), (2, 'a')])
......@@ -611,6 +656,15 @@ class TestVariousIteratorArgs(unittest.TestCase):
self.assertRaises(TypeError, list, izip(N(s)))
self.assertRaises(ZeroDivisionError, list, izip(E(s)))
def test_iziplongest(self):
for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
for g in (G, I, Ig, S, L, R):
self.assertEqual(list(izip_longest(g(s))), zip(g(s)))
self.assertEqual(list(izip_longest(g(s), g(s))), zip(g(s), g(s)))
self.assertRaises(TypeError, izip_longest, X(s))
self.assertRaises(TypeError, list, izip_longest(N(s)))
self.assertRaises(ZeroDivisionError, list, izip_longest(E(s)))
def test_imap(self):
for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)):
for g in (G, I, Ig, S, L, R):
......
......@@ -127,6 +127,8 @@ Library
- Added heapq.merge() for merging sorted input streams.
- Added itertools.izip_longest().
- Have the encoding package's search function dynamically import using absolute
import semantics.
......
......@@ -2472,6 +2472,238 @@ static PyTypeObject repeat_type = {
PyObject_GC_Del, /* tp_free */
};
/* iziplongest object ************************************************************/
#include "Python.h"
typedef struct {
PyObject_HEAD
Py_ssize_t tuplesize;
Py_ssize_t numactive;
PyObject *ittuple; /* tuple of iterators */
PyObject *result;
PyObject *fillvalue;
PyObject *filler; /* repeat(fillvalue) */
} iziplongestobject;
static PyTypeObject iziplongest_type;
static PyObject *
izip_longest_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
iziplongestobject *lz;
Py_ssize_t i;
PyObject *ittuple; /* tuple of iterators */
PyObject *result;
PyObject *fillvalue = Py_None;
PyObject *filler;
Py_ssize_t tuplesize = PySequence_Length(args);
if (kwds != NULL && PyDict_CheckExact(kwds) && PyDict_Size(kwds) > 0) {
fillvalue = PyDict_GetItemString(kwds, "fillvalue");
if (fillvalue == NULL || PyDict_Size(kwds) > 1) {
PyErr_SetString(PyExc_TypeError,
"izip_longest() got an unexpected keyword argument");
return NULL;
}
}
/* args must be a tuple */
assert(PyTuple_Check(args));
/* obtain iterators */
ittuple = PyTuple_New(tuplesize);
if (ittuple == NULL)
return NULL;
for (i=0; i < tuplesize; ++i) {
PyObject *item = PyTuple_GET_ITEM(args, i);
PyObject *it = PyObject_GetIter(item);
if (it == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
"izip_longest argument #%zd must support iteration",
i+1);
Py_DECREF(ittuple);
return NULL;
}
PyTuple_SET_ITEM(ittuple, i, it);
}
filler = PyObject_CallFunctionObjArgs((PyObject *)(&repeat_type), fillvalue, NULL);
if (filler == NULL) {
Py_DECREF(ittuple);
return NULL;
}
/* create a result holder */
result = PyTuple_New(tuplesize);
if (result == NULL) {
Py_DECREF(ittuple);
Py_DECREF(filler);
return NULL;
}
for (i=0 ; i < tuplesize ; i++) {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(result, i, Py_None);
}
/* create iziplongestobject structure */
lz = (iziplongestobject *)type->tp_alloc(type, 0);
if (lz == NULL) {
Py_DECREF(ittuple);
Py_DECREF(filler);
Py_DECREF(result);
return NULL;
}
lz->ittuple = ittuple;
lz->tuplesize = tuplesize;
lz->numactive = tuplesize;
lz->result = result;
Py_INCREF(fillvalue);
lz->fillvalue = fillvalue;
Py_INCREF(filler);
lz->filler = filler; /* XXX */
return (PyObject *)lz;
}
static void
izip_longest_dealloc(iziplongestobject *lz)
{
PyObject_GC_UnTrack(lz);
Py_XDECREF(lz->ittuple);
Py_XDECREF(lz->result);
Py_XDECREF(lz->fillvalue);
Py_XDECREF(lz->filler);
lz->ob_type->tp_free(lz);
}
static int
izip_longest_traverse(iziplongestobject *lz, visitproc visit, void *arg)
{
Py_VISIT(lz->ittuple);
Py_VISIT(lz->result);
Py_VISIT(lz->fillvalue);
Py_VISIT(lz->filler);
return 0;
}
static PyObject *
izip_longest_next(iziplongestobject *lz)
{
Py_ssize_t i;
Py_ssize_t tuplesize = lz->tuplesize;
PyObject *result = lz->result;
PyObject *it;
PyObject *item;
PyObject *olditem;
if (tuplesize == 0)
return NULL;
if (result->ob_refcnt == 1) {
Py_INCREF(result);
for (i=0 ; i < tuplesize ; i++) {
it = PyTuple_GET_ITEM(lz->ittuple, i);
assert(PyIter_Check(it));
item = (*it->ob_type->tp_iternext)(it);
if (item == NULL) {
if (lz->numactive <= 1) {
Py_DECREF(result);
return NULL;
} else {
Py_INCREF(lz->filler);
PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
Py_INCREF(lz->fillvalue);
item = lz->fillvalue;
Py_DECREF(it);
lz->numactive -= 1;
}
}
olditem = PyTuple_GET_ITEM(result, i);
PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem);
}
} else {
result = PyTuple_New(tuplesize);
if (result == NULL)
return NULL;
for (i=0 ; i < tuplesize ; i++) {
it = PyTuple_GET_ITEM(lz->ittuple, i);
assert(PyIter_Check(it));
item = (*it->ob_type->tp_iternext)(it);
if (item == NULL) {
if (lz->numactive <= 1) {
Py_DECREF(result);
return NULL;
} else {
Py_INCREF(lz->filler);
PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
Py_INCREF(lz->fillvalue);
item = lz->fillvalue;
Py_DECREF(it);
lz->numactive -= 1;
}
}
PyTuple_SET_ITEM(result, i, item);
}
}
return result;
}
PyDoc_STRVAR(izip_longest_doc,
"izip_longest(iter1 [,iter2 [...]], [fillvalue=None]) --> izip_longest object\n\
\n\
Return an izip_longest object whose .next() method returns a tuple where\n\
the i-th element comes from the i-th iterable argument. The .next()\n\
method continues until the longest iterable in the argument sequence\n\
is exhausted and then it raises StopIteration. When the shorter iterables\n\
are exhausted, the fillvalue is substituted in their place. The fillvalue\n\
defaults to None or can be specified by a keyword argument.\n\
");
static PyTypeObject iziplongest_type = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"itertools.izip_longest", /* tp_name */
sizeof(iziplongestobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)izip_longest_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
izip_longest_doc, /* tp_doc */
(traverseproc)izip_longest_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)izip_longest_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
izip_longest_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
/* module level code ********************************************************/
......@@ -2485,6 +2717,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
\n\
Iterators terminating on the shortest input sequence:\n\
izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
izip_longest(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\
ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
islice(seq, [start,] stop [, step]) --> elements from\n\
......@@ -2522,6 +2755,7 @@ inititertools(void)
&ifilterfalse_type,
&count_type,
&izip_type,
&iziplongest_type,
&repeat_type,
&groupby_type,
NULL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment