Commit 35db4395 authored by Raymond Hettinger's avatar Raymond Hettinger

Issue #13742: Add key and reverse parameters to heapq.merge()

parent e7bfe136
......@@ -444,12 +444,13 @@ Glossary
A number of tools in Python accept key functions to control how elements
are ordered or grouped. They include :func:`min`, :func:`max`,
:func:`sorted`, :meth:`list.sort`, :func:`heapq.nsmallest`,
:func:`heapq.nlargest`, and :func:`itertools.groupby`.
:func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`,
:func:`heapq.nsmallest`, :func:`heapq.nlargest`, and
There are several ways to create a key function. For example. the
:meth:`str.lower` method can serve as a key function for case insensitive
sorts. Alternatively, an ad-hoc key function can be built from a
sorts. Alternatively, a key function can be built from a
:keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also,
the :mod:`operator` module provides three key function constructors:
:func:`~operator.attrgetter`, :func:`~operator.itemgetter`, and
......@@ -81,7 +81,7 @@ The following functions are provided:
The module also offers three general purpose functions based on heaps.
.. function:: merge(*iterables)
.. function:: merge(*iterables, key=None, reverse=False)
Merge multiple sorted inputs into a single sorted output (for example, merge
timestamped entries from multiple log files). Returns an :term:`iterator`
......@@ -91,6 +91,18 @@ The module also offers three general purpose functions based on heaps.
not pull the data into memory all at once, and assumes that each of the input
streams is already sorted (smallest to largest).
Has two optional arguments which must be specified as keyword arguments.
*key* specifies a :term:`key function` of one argument that is used to
extract a comparison key from each input element. The default value is
``None`` (compare the elements directly).
*reverse* is a boolean value. If set to ``True``, then the input elements
are merged as if each comparison were reversed.
.. versionchanged:: 3.5
Added the optional *key* and *reverse* parameters.
.. function:: nlargest(n, iterable, key=None)
......@@ -176,6 +176,16 @@ def heapify(x):
for i in reversed(range(n//2)):
_siftup(x, i)
def _heappop_max(heap):
"""Maxheap version of a heappop."""
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
if heap:
returnitem = heap[0]
heap[0] = lastelt
_siftup_max(heap, 0)
return returnitem
return lastelt
def _heapreplace_max(heap, item):
"""Maxheap version of a heappop followed by a heappush."""
returnitem = heap[0] # raises appropriate IndexError if heap is empty
......@@ -311,7 +321,7 @@ try:
except ImportError:
def merge(*iterables):
def merge(*iterables, key=None, reverse=False):
'''Merge multiple sorted inputs into a single sorted output.
Similar to sorted(itertools.chain(*iterables)) but returns a generator,
......@@ -321,31 +331,73 @@ def merge(*iterables):
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
If *key* is not None, applies a key function to each element to determine
its sort order.
>>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len))
['dog', 'cat', 'fish', 'horse', 'kangaroo']
h = []
h_append = h.append
if reverse:
_heapify = _heapify_max
_heappop = _heappop_max
_heapreplace = _heapreplace_max
direction = -1
_heapify = heapify
_heappop = heappop
_heapreplace = heapreplace
direction = 1
if key is None:
for order, it in enumerate(map(iter, iterables)):
next = it.__next__
h_append([next(), order * direction, next])
except StopIteration:
while len(h) > 1:
while True:
value, order, next = s = h[0]
yield value
s[0] = next() # raises StopIteration when exhausted
_heapreplace(h, s) # restore heap condition
except StopIteration:
_heappop(h) # remove empty iterator
if h:
# fast case when only a single iterator remains
value, order, next = h[0]
yield value
yield from next.__self__
for order, it in enumerate(map(iter, iterables)):
next = it.__next__
h_append([next(), order, next])
value = next()
h_append([key(value), order * direction, value, next])
except StopIteration:
_heapreplace = heapreplace
while len(h) > 1:
while True:
value, order, next = s = h[0]
key_value, order, value, next = s = h[0]
yield value
s[0] = next() # raises StopIteration when exhausted
_heapreplace(h, s) # restore heap condition
value = next()
s[0] = key(value)
s[2] = value
_heapreplace(h, s)
except StopIteration:
heappop(h) # remove empty iterator
if h:
# fast case when only a single iterator remains
value, order, next = h[0]
key_value, order, value, next = h[0]
yield value
yield from next.__self__
......@@ -6,6 +6,7 @@ import unittest
from test import support
from unittest import TestCase, skipUnless
from operator import itemgetter
py_heapq = support.import_fresh_module('heapq', blocked=['_heapq'])
c_heapq = support.import_fresh_module('heapq', fresh=['_heapq'])
......@@ -152,11 +153,21 @@ class TestHeap:
def test_merge(self):
inputs = []
for i in range(random.randrange(5)):
row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
for i in range(random.randrange(25)):
row = []
for j in range(random.randrange(100)):
tup = random.choice('ABC'), random.randrange(-500, 500)
self.assertEqual(sorted(chain(*inputs)), list(self.module.merge(*inputs)))
self.assertEqual(list(self.module.merge()), [])
for key in [None, itemgetter(0), itemgetter(1), itemgetter(1, 0)]:
for reverse in [False, True]:
seqs = []
for seq in inputs:
seqs.append(sorted(seq, key=key, reverse=reverse))
self.assertEqual(sorted(chain(*inputs), key=key, reverse=reverse),
list(self.module.merge(*seqs, key=key, reverse=reverse)))
self.assertEqual(list(self.module.merge()), [])
def test_merge_does_not_suppress_index_error(self):
# Issue 19018: Heapq.merge suppresses IndexError from user generator
......@@ -94,6 +94,9 @@ Library
error bubble up as this "bad data" appears in many real world zip files in
the wild and is ignored by other zip tools.
- Issue #13742: Added "key" and "reverse" parameters to heapq.merge().
(First draft of patch contributed by Simon Sapin.)
- Issue #21402: tkinter.ttk now works when default root window is not set.
- Issue #3015: _tkinter.create() now creates tkapp object with wantobject=1 by
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment