Commit 3b06e243 authored by Steven D'Aprano's avatar Steven D'Aprano

Issue 26002 and 25974

patches by Upendra Kumar and Stefan Krah
speed up median by using bisect, and general speedup for Decimals using as_integer_ratio
parent ad039f75
...@@ -105,6 +105,7 @@ import math ...@@ -105,6 +105,7 @@ import math
from fractions import Fraction from fractions import Fraction
from decimal import Decimal from decimal import Decimal
from itertools import groupby from itertools import groupby
from bisect import bisect_left, bisect_right
...@@ -223,56 +224,26 @@ def _exact_ratio(x): ...@@ -223,56 +224,26 @@ def _exact_ratio(x):
# Optimise the common case of floats. We expect that the most often # Optimise the common case of floats. We expect that the most often
# used numeric type will be builtin floats, so try to make this as # used numeric type will be builtin floats, so try to make this as
# fast as possible. # fast as possible.
if type(x) is float: if type(x) is float or type(x) is Decimal:
return x.as_integer_ratio() return x.as_integer_ratio()
try: try:
# x may be an int, Fraction, or Integral ABC. # x may be an int, Fraction, or Integral ABC.
return (x.numerator, x.denominator) return (x.numerator, x.denominator)
except AttributeError: except AttributeError:
try: try:
# x may be a float subclass. # x may be a float or Decimal subclass.
return x.as_integer_ratio() return x.as_integer_ratio()
except AttributeError: except AttributeError:
try: # Just give up?
# x may be a Decimal. pass
return _decimal_to_ratio(x)
except AttributeError:
# Just give up?
pass
except (OverflowError, ValueError): except (OverflowError, ValueError):
# float NAN or INF. # float NAN or INF.
assert not math.isfinite(x) assert not _isfinite(x)
return (x, None) return (x, None)
msg = "can't convert type '{}' to numerator/denominator" msg = "can't convert type '{}' to numerator/denominator"
raise TypeError(msg.format(type(x).__name__)) raise TypeError(msg.format(type(x).__name__))
# FIXME This is faster than Fraction.from_decimal, but still too slow.
def _decimal_to_ratio(d):
"""Convert Decimal d to exact integer ratio (numerator, denominator).
>>> from decimal import Decimal
>>> _decimal_to_ratio(Decimal("2.6"))
(26, 10)
"""
sign, digits, exp = d.as_tuple()
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
assert not d.is_finite()
return (d, None)
num = 0
for digit in digits:
num = num*10 + digit
if exp < 0:
den = 10**-exp
else:
num *= 10**exp
den = 1
if sign:
num = -num
return (num, den)
def _convert(value, T): def _convert(value, T):
"""Convert value to given numeric type T.""" """Convert value to given numeric type T."""
if type(value) is T: if type(value) is T:
...@@ -305,6 +276,21 @@ def _counts(data): ...@@ -305,6 +276,21 @@ def _counts(data):
return table return table
def _find_lteq(a, x):
'Locate the leftmost value exactly equal to x'
i = bisect_left(a, x)
if i != len(a) and a[i] == x:
return i
raise ValueError
def _find_rteq(a, l, x):
'Locate the rightmost value exactly equal to x'
i = bisect_right(a, x, lo=l)
if i != (len(a)+1) and a[i-1] == x:
return i-1
raise ValueError
# === Measures of central tendency (averages) === # === Measures of central tendency (averages) ===
def mean(data): def mean(data):
...@@ -442,9 +428,15 @@ def median_grouped(data, interval=1): ...@@ -442,9 +428,15 @@ def median_grouped(data, interval=1):
except TypeError: except TypeError:
# Mixed type. For now we just coerce to float. # Mixed type. For now we just coerce to float.
L = float(x) - float(interval)/2 L = float(x) - float(interval)/2
cf = data.index(x) # Number of values below the median interval.
# FIXME The following line could be more efficient for big lists. # Uses bisection search to search for x in data with log(n) time complexity
f = data.count(x) # Number of data points in the median interval. # Find the position of leftmost occurence of x in data
l1 = _find_lteq(data, x)
# Find the position of rightmost occurence of x in data[l1...len(data)]
# Assuming always l1 <= l2
l2 = _find_rteq(data, l1, x)
cf = l1
f = l2 - l1 + 1
return L + interval*(n/2 - cf)/f return L + interval*(n/2 - cf)/f
......
...@@ -699,13 +699,12 @@ class ExactRatioTest(unittest.TestCase): ...@@ -699,13 +699,12 @@ class ExactRatioTest(unittest.TestCase):
num, den = statistics._exact_ratio(x) num, den = statistics._exact_ratio(x)
self.assertEqual(x, num/den) self.assertEqual(x, num/den)
@unittest.skipIf(True, "temporarily disabled: see #25928")
def test_decimal(self): def test_decimal(self):
D = Decimal D = Decimal
_exact_ratio = statistics._exact_ratio _exact_ratio = statistics._exact_ratio
self.assertEqual(_exact_ratio(D("0.125")), (125, 1000)) self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000)) self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100)) self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
def test_inf(self): def test_inf(self):
INF = float("INF") INF = float("INF")
...@@ -731,7 +730,6 @@ class ExactRatioTest(unittest.TestCase): ...@@ -731,7 +730,6 @@ class ExactRatioTest(unittest.TestCase):
self.assertIs(ratio[1], None) self.assertIs(ratio[1], None)
self.assertEqual(type(ratio[0]), type(nan)) self.assertEqual(type(ratio[0]), type(nan))
@unittest.skipIf(True, "temporarily disabled: see #25928")
def test_decimal_nan(self): def test_decimal_nan(self):
NAN = Decimal("NAN") NAN = Decimal("NAN")
sNAN = Decimal("sNAN") sNAN = Decimal("sNAN")
...@@ -745,18 +743,18 @@ class ExactRatioTest(unittest.TestCase): ...@@ -745,18 +743,18 @@ class ExactRatioTest(unittest.TestCase):
class DecimalToRatioTest(unittest.TestCase): class DecimalToRatioTest(unittest.TestCase):
# Test _decimal_to_ratio private function. # Test _exact_ratio private function.
def test_infinity(self): def test_infinity(self):
# Test that INFs are handled correctly. # Test that INFs are handled correctly.
inf = Decimal('INF') inf = Decimal('INF')
self.assertEqual(statistics._decimal_to_ratio(inf), (inf, None)) self.assertEqual(statistics._exact_ratio(inf), (inf, None))
self.assertEqual(statistics._decimal_to_ratio(-inf), (-inf, None)) self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
def test_nan(self): def test_nan(self):
# Test that NANs are handled correctly. # Test that NANs are handled correctly.
for nan in (Decimal('NAN'), Decimal('sNAN')): for nan in (Decimal('NAN'), Decimal('sNAN')):
num, den = statistics._decimal_to_ratio(nan) num, den = statistics._exact_ratio(nan)
# Because NANs always compare non-equal, we cannot use assertEqual. # Because NANs always compare non-equal, we cannot use assertEqual.
# Nor can we use an identity test, as we don't guarantee anything # Nor can we use an identity test, as we don't guarantee anything
# about the object identity. # about the object identity.
...@@ -769,30 +767,30 @@ class DecimalToRatioTest(unittest.TestCase): ...@@ -769,30 +767,30 @@ class DecimalToRatioTest(unittest.TestCase):
for d in numbers: for d in numbers:
# First test positive decimals. # First test positive decimals.
assert d > 0 assert d > 0
num, den = statistics._decimal_to_ratio(d) num, den = statistics._exact_ratio(d)
self.assertGreaterEqual(num, 0) self.assertGreaterEqual(num, 0)
self.assertGreater(den, 0) self.assertGreater(den, 0)
# Then test negative decimals. # Then test negative decimals.
num, den = statistics._decimal_to_ratio(-d) num, den = statistics._exact_ratio(-d)
self.assertLessEqual(num, 0) self.assertLessEqual(num, 0)
self.assertGreater(den, 0) self.assertGreater(den, 0)
def test_negative_exponent(self): def test_negative_exponent(self):
# Test result when the exponent is negative. # Test result when the exponent is negative.
t = statistics._decimal_to_ratio(Decimal("0.1234")) t = statistics._exact_ratio(Decimal("0.1234"))
self.assertEqual(t, (1234, 10000)) self.assertEqual(t, (617, 5000))
def test_positive_exponent(self): def test_positive_exponent(self):
# Test results when the exponent is positive. # Test results when the exponent is positive.
t = statistics._decimal_to_ratio(Decimal("1.234e7")) t = statistics._exact_ratio(Decimal("1.234e7"))
self.assertEqual(t, (12340000, 1)) self.assertEqual(t, (12340000, 1))
def test_regression_20536(self): def test_regression_20536(self):
# Regression test for issue 20536. # Regression test for issue 20536.
# See http://bugs.python.org/issue20536 # See http://bugs.python.org/issue20536
t = statistics._decimal_to_ratio(Decimal("1e2")) t = statistics._exact_ratio(Decimal("1e2"))
self.assertEqual(t, (100, 1)) self.assertEqual(t, (100, 1))
t = statistics._decimal_to_ratio(Decimal("1.47e5")) t = statistics._exact_ratio(Decimal("1.47e5"))
self.assertEqual(t, (147000, 1)) self.assertEqual(t, (147000, 1))
...@@ -1260,7 +1258,6 @@ class SumSpecialValues(NumericTestCase): ...@@ -1260,7 +1258,6 @@ class SumSpecialValues(NumericTestCase):
with decimal.localcontext(decimal.BasicContext): with decimal.localcontext(decimal.BasicContext):
self.assertRaises(decimal.InvalidOperation, statistics._sum, data) self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
@unittest.skipIf(True, "temporarily disabled: see #25928")
def test_decimal_snan_raises(self): def test_decimal_snan_raises(self):
# Adding sNAN should raise InvalidOperation. # Adding sNAN should raise InvalidOperation.
sNAN = Decimal('sNAN') sNAN = Decimal('sNAN')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment