Commit 3b06e243 authored by Steven D'Aprano's avatar Steven D'Aprano

Issue 26002 and 25974

patches by Upendra Kumar and Stefan Krah
speed up median by using bisect, and general speedup for Decimals using as_integer_ratio
parent ad039f75
......@@ -105,6 +105,7 @@ import math
from fractions import Fraction
from decimal import Decimal
from itertools import groupby
from bisect import bisect_left, bisect_right
......@@ -223,56 +224,26 @@ def _exact_ratio(x):
# Optimise the common case of floats. We expect that the most often
# used numeric type will be builtin floats, so try to make this as
# fast as possible.
if type(x) is float:
if type(x) is float or type(x) is Decimal:
return x.as_integer_ratio()
try:
# x may be an int, Fraction, or Integral ABC.
return (x.numerator, x.denominator)
except AttributeError:
try:
# x may be a float subclass.
# x may be a float or Decimal subclass.
return x.as_integer_ratio()
except AttributeError:
try:
# x may be a Decimal.
return _decimal_to_ratio(x)
except AttributeError:
# Just give up?
pass
except (OverflowError, ValueError):
# float NAN or INF.
assert not math.isfinite(x)
assert not _isfinite(x)
return (x, None)
msg = "can't convert type '{}' to numerator/denominator"
raise TypeError(msg.format(type(x).__name__))
# FIXME This is faster than Fraction.from_decimal, but still too slow.
def _decimal_to_ratio(d):
"""Convert Decimal d to exact integer ratio (numerator, denominator).
>>> from decimal import Decimal
>>> _decimal_to_ratio(Decimal("2.6"))
(26, 10)
"""
sign, digits, exp = d.as_tuple()
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
assert not d.is_finite()
return (d, None)
num = 0
for digit in digits:
num = num*10 + digit
if exp < 0:
den = 10**-exp
else:
num *= 10**exp
den = 1
if sign:
num = -num
return (num, den)
def _convert(value, T):
"""Convert value to given numeric type T."""
if type(value) is T:
......@@ -305,6 +276,21 @@ def _counts(data):
return table
def _find_lteq(a, x):
'Locate the leftmost value exactly equal to x'
i = bisect_left(a, x)
if i != len(a) and a[i] == x:
return i
raise ValueError
def _find_rteq(a, l, x):
'Locate the rightmost value exactly equal to x'
i = bisect_right(a, x, lo=l)
if i != (len(a)+1) and a[i-1] == x:
return i-1
raise ValueError
# === Measures of central tendency (averages) ===
def mean(data):
......@@ -442,9 +428,15 @@ def median_grouped(data, interval=1):
except TypeError:
# Mixed type. For now we just coerce to float.
L = float(x) - float(interval)/2
cf = data.index(x) # Number of values below the median interval.
# FIXME The following line could be more efficient for big lists.
f = data.count(x) # Number of data points in the median interval.
# Uses bisection search to search for x in data with log(n) time complexity
# Find the position of leftmost occurence of x in data
l1 = _find_lteq(data, x)
# Find the position of rightmost occurence of x in data[l1...len(data)]
# Assuming always l1 <= l2
l2 = _find_rteq(data, l1, x)
cf = l1
f = l2 - l1 + 1
return L + interval*(n/2 - cf)/f
......
......@@ -699,13 +699,12 @@ class ExactRatioTest(unittest.TestCase):
num, den = statistics._exact_ratio(x)
self.assertEqual(x, num/den)
@unittest.skipIf(True, "temporarily disabled: see #25928")
def test_decimal(self):
D = Decimal
_exact_ratio = statistics._exact_ratio
self.assertEqual(_exact_ratio(D("0.125")), (125, 1000))
self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000))
self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100))
self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
def test_inf(self):
INF = float("INF")
......@@ -731,7 +730,6 @@ class ExactRatioTest(unittest.TestCase):
self.assertIs(ratio[1], None)
self.assertEqual(type(ratio[0]), type(nan))
@unittest.skipIf(True, "temporarily disabled: see #25928")
def test_decimal_nan(self):
NAN = Decimal("NAN")
sNAN = Decimal("sNAN")
......@@ -745,18 +743,18 @@ class ExactRatioTest(unittest.TestCase):
class DecimalToRatioTest(unittest.TestCase):
# Test _decimal_to_ratio private function.
# Test _exact_ratio private function.
def test_infinity(self):
# Test that INFs are handled correctly.
inf = Decimal('INF')
self.assertEqual(statistics._decimal_to_ratio(inf), (inf, None))
self.assertEqual(statistics._decimal_to_ratio(-inf), (-inf, None))
self.assertEqual(statistics._exact_ratio(inf), (inf, None))
self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
def test_nan(self):
# Test that NANs are handled correctly.
for nan in (Decimal('NAN'), Decimal('sNAN')):
num, den = statistics._decimal_to_ratio(nan)
num, den = statistics._exact_ratio(nan)
# Because NANs always compare non-equal, we cannot use assertEqual.
# Nor can we use an identity test, as we don't guarantee anything
# about the object identity.
......@@ -769,30 +767,30 @@ class DecimalToRatioTest(unittest.TestCase):
for d in numbers:
# First test positive decimals.
assert d > 0
num, den = statistics._decimal_to_ratio(d)
num, den = statistics._exact_ratio(d)
self.assertGreaterEqual(num, 0)
self.assertGreater(den, 0)
# Then test negative decimals.
num, den = statistics._decimal_to_ratio(-d)
num, den = statistics._exact_ratio(-d)
self.assertLessEqual(num, 0)
self.assertGreater(den, 0)
def test_negative_exponent(self):
# Test result when the exponent is negative.
t = statistics._decimal_to_ratio(Decimal("0.1234"))
self.assertEqual(t, (1234, 10000))
t = statistics._exact_ratio(Decimal("0.1234"))
self.assertEqual(t, (617, 5000))
def test_positive_exponent(self):
# Test results when the exponent is positive.
t = statistics._decimal_to_ratio(Decimal("1.234e7"))
t = statistics._exact_ratio(Decimal("1.234e7"))
self.assertEqual(t, (12340000, 1))
def test_regression_20536(self):
# Regression test for issue 20536.
# See http://bugs.python.org/issue20536
t = statistics._decimal_to_ratio(Decimal("1e2"))
t = statistics._exact_ratio(Decimal("1e2"))
self.assertEqual(t, (100, 1))
t = statistics._decimal_to_ratio(Decimal("1.47e5"))
t = statistics._exact_ratio(Decimal("1.47e5"))
self.assertEqual(t, (147000, 1))
......@@ -1260,7 +1258,6 @@ class SumSpecialValues(NumericTestCase):
with decimal.localcontext(decimal.BasicContext):
self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
@unittest.skipIf(True, "temporarily disabled: see #25928")
def test_decimal_snan_raises(self):
# Adding sNAN should raise InvalidOperation.
sNAN = Decimal('sNAN')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment