Commit 7ab3d157 authored by Tim Peters's avatar Tim Peters Committed by Raymond Hettinger

Rework tuple hash tests. (GH-10161)

Add tooling that will useful in future updates, paying particular attention to difficult cases where only the upper bits on the input vary.
parent 5741c45a
......@@ -2944,3 +2944,44 @@ class FakePath:
def maybe_get_event_loop_policy():
"""Return the global event loop policy if one is set, else return None."""
return asyncio.events._event_loop_policy
# Helpers for testing hashing.
NHASHBITS = sys.hash_info.width # number of bits in hash() result
assert NHASHBITS in (32, 64)
# Return mean and sdev of number of collisions when tossing nballs balls
# uniformly at random into nbins bins. By definition, the number of
# collisions is the number of balls minus the number of occupied bins at
# the end.
def collision_stats(nbins, nballs):
n, k = nbins, nballs
# prob a bin empty after k trials = (1 - 1/n)**k
# mean # empty is then n * (1 - 1/n)**k
# so mean # occupied is n - n * (1 - 1/n)**k
# so collisions = k - (n - n*(1 - 1/n)**k)
#
# For the variance:
# n*(n-1)*(1-2/n)**k + meanempty - meanempty**2 =
# n*(n-1)*(1-2/n)**k + meanempty * (1 - meanempty)
#
# Massive cancellation occurs, and, e.g., for a 64-bit hash code
# 1-1/2**64 rounds uselessly to 1.0. Rather than make heroic (and
# error-prone) efforts to rework the naive formulas to avoid those,
# we use the `decimal` module to get plenty of extra precision.
#
# Note: the exact values are straightforward to compute with
# rationals, but in context that's unbearably slow, requiring
# multi-million bit arithmetic.
import decimal
with decimal.localcontext() as ctx:
bits = n.bit_length() * 2 # bits in n**2
# At least that many bits will likely cancel out.
# Use that many decimal digits instead.
ctx.prec = max(bits, 30)
dn = decimal.Decimal(n)
p1empty = ((dn - 1) / dn) ** k
meanempty = n * p1empty
occupied = n - meanempty
collisions = k - occupied
var = dn*(dn-1)*((dn-2)/dn)**k + meanempty * (1 - meanempty)
return float(collisions), float(var.sqrt())
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment