Commit 3e6aafe2 authored by Raymond Hettinger's avatar Raymond Hettinger

Issue #16098: Update heapq.nsmallest to use the same algorithm as nlargest.

This removes the dependency on bisect and it bring the pure Python code
in-sync with the C code.
parent dce969d2
...@@ -129,9 +129,8 @@ From all times, sorting has always been a Great Art! :-) ...@@ -129,9 +129,8 @@ From all times, sorting has always been a Great Art! :-)
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
'nlargest', 'nsmallest', 'heappushpop'] 'nlargest', 'nsmallest', 'heappushpop']
from itertools import islice, repeat, count, imap, izip, tee, chain from itertools import islice, count, imap, izip, tee, chain
from operator import itemgetter from operator import itemgetter
import bisect
def cmp_lt(x, y): def cmp_lt(x, y):
# Use __lt__ if available; otherwise, try __le__. # Use __lt__ if available; otherwise, try __le__.
...@@ -188,6 +187,19 @@ def heapify(x): ...@@ -188,6 +187,19 @@ def heapify(x):
for i in reversed(xrange(n//2)): for i in reversed(xrange(n//2)):
_siftup(x, i) _siftup(x, i)
def _heappushpop_max(heap, item):
"""Maxheap version of a heappush followed by a heappop."""
if heap and cmp_lt(item, heap[0]):
item, heap[0] = heap[0], item
_siftup_max(heap, 0)
return item
def _heapify_max(x):
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
n = len(x)
for i in reversed(range(n//2)):
_siftup_max(x, i)
def nlargest(n, iterable): def nlargest(n, iterable):
"""Find the n largest elements in a dataset. """Find the n largest elements in a dataset.
...@@ -213,30 +225,16 @@ def nsmallest(n, iterable): ...@@ -213,30 +225,16 @@ def nsmallest(n, iterable):
""" """
if n < 0: if n < 0:
return [] return []
if hasattr(iterable, '__len__') and n * 10 <= len(iterable): it = iter(iterable)
# For smaller values of n, the bisect method is faster than a minheap. result = list(islice(it, n))
# It is also memory efficient, consuming only n elements of space. if not result:
it = iter(iterable)
result = sorted(islice(it, 0, n))
if not result:
return result
insort = bisect.insort
pop = result.pop
los = result[-1] # los --> Largest of the nsmallest
for elem in it:
if cmp_lt(elem, los):
insort(result, elem)
pop()
los = result[-1]
return result return result
# An alternative approach manifests the whole iterable in memory but _heapify_max(result)
# saves comparisons by heapifying all at once. Also, saves time _heappushpop = _heappushpop_max
# over bisect.insort() which has O(n) data movement time for every for elem in it:
# insertion. Finding the n smallest of an m length iterable requires _heappushpop(result, elem)
# O(m) + O(n log m) comparisons. result.sort()
h = list(iterable) return result
heapify(h)
return map(heappop, repeat(h, min(n, len(h))))
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
# is the index of a leaf with a possibly out-of-order value. Restore the # is the index of a leaf with a possibly out-of-order value. Restore the
...@@ -314,6 +312,42 @@ def _siftup(heap, pos): ...@@ -314,6 +312,42 @@ def _siftup(heap, pos):
heap[pos] = newitem heap[pos] = newitem
_siftdown(heap, startpos, pos) _siftdown(heap, startpos, pos)
def _siftdown_max(heap, startpos, pos):
'Maxheap variant of _siftdown'
newitem = heap[pos]
# Follow the path to the root, moving parents down until finding a place
# newitem fits.
while pos > startpos:
parentpos = (pos - 1) >> 1
parent = heap[parentpos]
if cmp_lt(parent, newitem):
heap[pos] = parent
pos = parentpos
continue
break
heap[pos] = newitem
def _siftup_max(heap, pos):
'Maxheap variant of _siftup'
endpos = len(heap)
startpos = pos
newitem = heap[pos]
# Bubble up the larger child until hitting a leaf.
childpos = 2*pos + 1 # leftmost child position
while childpos < endpos:
# Set childpos to index of larger child.
rightpos = childpos + 1
if rightpos < endpos and not cmp_lt(heap[rightpos], heap[childpos]):
childpos = rightpos
# Move the larger child up.
heap[pos] = heap[childpos]
pos = childpos
childpos = 2*pos + 1
# The leaf at pos is empty now. Put newitem there, and bubble it up
# to its final resting place (by sifting its parents down).
heap[pos] = newitem
_siftdown_max(heap, startpos, pos)
# If available, use C implementation # If available, use C implementation
try: try:
from _heapq import * from _heapq import *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment