Commit 2b66c664 authored by Nick Coghlan's avatar Nick Coghlan

Close #20481: Disallow mixed type input in statistics

The most appropriate coercion rules are not yet clear, so simply
disallowing mixed type input for 3.4.

(Committed on Steven's behalf)
parent b3ce9673
......@@ -20,6 +20,16 @@
This module provides functions for calculating mathematical statistics of
numeric (:class:`Real`-valued) data.
.. note::
Unless explicitly noted otherwise, these functions support :class:`int`,
:class:`float`, :class:`decimal.Decimal` and :class:`fractions.Fraction`.
Behaviour with other types (whether in the numeric tower or not) is
currently unsupported. Mixed types are also undefined and
implementation-dependent. If your input data consists of mixed types,
you may be able to use :func:`map` to ensure a consistent result, e.g.
``map(float, input_data)``.
Averages and measures of central location
-----------------------------------------
......
......@@ -144,19 +144,31 @@ def _sum(data, start=0):
>>> _sum(data)
Decimal('0.6963')
Mixed types are currently treated as an error, except that int is
allowed.
"""
# We fail as soon as we reach a value that is not an int or the type of
# the first value which is not an int. E.g. _sum([int, int, float, int])
# is okay, but sum([int, int, float, Fraction]) is not.
allowed_types = set([int, type(start)])
n, d = _exact_ratio(start)
T = type(start)
partials = {d: n} # map {denominator: sum of numerators}
# Micro-optimizations.
coerce_types = _coerce_types
exact_ratio = _exact_ratio
partials_get = partials.get
# Add numerators for each denominator, and track the "current" type.
# Add numerators for each denominator.
for x in data:
T = _coerce_types(T, type(x))
_check_type(type(x), allowed_types)
n, d = exact_ratio(x)
partials[d] = partials_get(d, 0) + n
# Find the expected result type. If allowed_types has only one item, it
# will be int; if it has two, use the one which isn't int.
assert len(allowed_types) in (1, 2)
if len(allowed_types) == 1:
assert allowed_types.pop() is int
T = int
else:
T = (allowed_types - set([int])).pop()
if None in partials:
assert issubclass(T, (float, Decimal))
assert not math.isfinite(partials[None])
......@@ -172,6 +184,15 @@ def _sum(data, start=0):
return T(total)
def _check_type(T, allowed):
if T not in allowed:
if len(allowed) == 1:
allowed.add(T)
else:
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
raise TypeError("unsupported mixed types: %s" % types)
def _exact_ratio(x):
"""Convert Real number x exactly to (numerator, denominator) pair.
......@@ -228,44 +249,6 @@ def _decimal_to_ratio(d):
return (num, den)
def _coerce_types(T1, T2):
"""Coerce types T1 and T2 to a common type.
>>> _coerce_types(int, float)
<class 'float'>
Coercion is performed according to this table, where "N/A" means
that a TypeError exception is raised.
+----------+-----------+-----------+-----------+----------+
| | int | Fraction | Decimal | float |
+----------+-----------+-----------+-----------+----------+
| int | int | Fraction | Decimal | float |
| Fraction | Fraction | Fraction | N/A | float |
| Decimal | Decimal | N/A | Decimal | float |
| float | float | float | float | float |
+----------+-----------+-----------+-----------+----------+
Subclasses trump their parent class; two subclasses of the same
base class will be coerced to the second of the two.
"""
# Get the common/fast cases out of the way first.
if T1 is T2: return T1
if T1 is int: return T2
if T2 is int: return T1
# Subclasses trump their parent class.
if issubclass(T2, T1): return T2
if issubclass(T1, T2): return T1
# Floats trump everything else.
if issubclass(T2, float): return T2
if issubclass(T1, float): return T1
# Subclasses of the same base class give priority to the second.
if T1.__base__ is T2.__base__: return T2
# Otherwise, just give up.
raise TypeError('cannot coerce types %r and %r' % (T1, T2))
def _counts(data):
# Generate a table of sorted (value, frequency) pairs.
table = collections.Counter(iter(data)).most_common()
......
......@@ -687,6 +687,26 @@ class DecimalToRatioTest(unittest.TestCase):
self.assertRaises(ValueError, statistics._decimal_to_ratio, d)
class CheckTypeTest(unittest.TestCase):
# Test _check_type private function.
def test_allowed(self):
# Test that a type which should be allowed is allowed.
allowed = set([int, float])
statistics._check_type(int, allowed)
statistics._check_type(float, allowed)
def test_not_allowed(self):
# Test that a type which should not be allowed raises.
allowed = set([int, float])
self.assertRaises(TypeError, statistics._check_type, Decimal, allowed)
def test_add_to_allowed(self):
# Test that a second type will be added to the allowed set.
allowed = set([int])
statistics._check_type(float, allowed)
self.assertEqual(allowed, set([int, float]))
# === Tests for public functions ===
......@@ -881,40 +901,11 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
def test_mixed_sum(self):
# Mixed sums are allowed.
# Careful here: order matters. Can't mix Fraction and Decimal directly,
# only after they're converted to float.
data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")]
self.assertEqual(self.func(data), 6.75)
class SumInternalsTest(NumericTestCase):
# Test internals of the sum function.
def test_ignore_instance_float_method(self):
# Test that __float__ methods on data instances are ignored.
# Python typically calls __dunder__ methods on the class, not the
# instance. The ``sum`` implementation calls __float__ directly. To
# better match the behaviour of Python, we call it only on the class,
# not the instance. This test will fail if somebody "fixes" that code.
# Create a fake __float__ method.
def __float__(self):
raise AssertionError('test fails')
# Inject it into an instance.
class MyNumber(Fraction):
pass
x = MyNumber(3)
x.__float__ = types.MethodType(__float__, x)
# Check it works as expected.
self.assertRaises(AssertionError, x.__float__)
self.assertEqual(float(x), 3.0)
# And now test the function.
self.assertEqual(statistics._sum([1.0, 2.0, x, 4.0]), 10.0)
# Mixed input types are not (currently) allowed.
# Check that mixed data types fail.
self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)])
# And so does mixed start argument.
self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
class SumTortureTest(NumericTestCase):
......
......@@ -24,6 +24,12 @@ Core and Builtins
Library
-------
- Issue #20481: For at least Python 3.4, the statistics module will require
that all inputs for a single operation be of a single consistent type, or
else a mixed of ints and a single other consistent type. This avoids
some interoperability issues that arose with the previous approach of
coercing to a suitable common type.
- Issue #20478: the statistics module now treats collections.Counter inputs
like any other iterable.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment