Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
40a841bc
Commit
40a841bc
authored
Dec 01, 2015
by
Steven D'Aprano
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed issue #25177, problems with the mean of very small and very large numbers.
parent
ee1a0e4b
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
433 additions
and
119 deletions
+433
-119
Lib/statistics.py
Lib/statistics.py
+114
-71
Lib/test/test_statistics.py
Lib/test/test_statistics.py
+315
-48
Misc/NEWS
Misc/NEWS
+4
-0
No files found.
Lib/statistics.py
View file @
40a841bc
...
@@ -104,6 +104,8 @@ import math
...
@@ -104,6 +104,8 @@ import math
from
fractions
import
Fraction
from
fractions
import
Fraction
from
decimal
import
Decimal
from
decimal
import
Decimal
from
itertools
import
groupby
# === Exceptions ===
# === Exceptions ===
...
@@ -115,86 +117,102 @@ class StatisticsError(ValueError):
...
@@ -115,86 +117,102 @@ class StatisticsError(ValueError):
# === Private utilities ===
# === Private utilities ===
def
_sum
(
data
,
start
=
0
):
def
_sum
(
data
,
start
=
0
):
"""_sum(data [, start]) -> value
"""_sum(data [, start]) -> (type, sum, count)
Return a high-precision sum of the given numeric data as a fraction,
together with the type to be converted to and the count of items.
Return a high-precision sum of the given numeric data. If optional
If optional argument ``start`` is given, it is added to the total.
argument ``start`` is given, it is added to the total. If ``data`` is
If ``data`` is empty, ``start`` (defaulting to 0) is returned.
empty, ``start`` (defaulting to 0) is returned.
Examples
Examples
--------
--------
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
11.0
(<class 'float'>, Fraction(11, 1), 5)
Some sources of round-off error will be avoided:
Some sources of round-off error will be avoided:
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
1000.0
(<class 'float'>, Fraction(1000, 1), 3000)
Fractions and Decimals are also supported:
Fractions and Decimals are also supported:
>>> from fractions import Fraction as F
>>> from fractions import Fraction as F
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
Fraction(63, 20
)
(<class 'fractions.Fraction'>, Fraction(63, 20), 4
)
>>> from decimal import Decimal as D
>>> from decimal import Decimal as D
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> _sum(data)
>>> _sum(data)
Decimal('0.6963'
)
(<class 'decimal.Decimal'>, Fraction(6963, 10000), 4
)
Mixed types are currently treated as an error, except that int is
Mixed types are currently treated as an error, except that int is
allowed.
allowed.
"""
"""
# We fail as soon as we reach a value that is not an int or the type of
count
=
0
# the first value which is not an int. E.g. _sum([int, int, float, int])
# is okay, but sum([int, int, float, Fraction]) is not.
allowed_types
=
{
int
,
type
(
start
)}
n
,
d
=
_exact_ratio
(
start
)
n
,
d
=
_exact_ratio
(
start
)
partials
=
{
d
:
n
}
# map {denominator: sum of numerators}
partials
=
{
d
:
n
}
# Micro-optimizations.
exact_ratio
=
_exact_ratio
partials_get
=
partials
.
get
partials_get
=
partials
.
get
# Add numerators for each denominator.
T
=
_coerce
(
int
,
type
(
start
))
for
x
in
data
:
for
typ
,
values
in
groupby
(
data
,
type
):
_check_type
(
type
(
x
),
allowed_types
)
T
=
_coerce
(
T
,
typ
)
# or raise TypeError
n
,
d
=
exact_ratio
(
x
)
for
n
,
d
in
map
(
_exact_ratio
,
values
):
count
+=
1
partials
[
d
]
=
partials_get
(
d
,
0
)
+
n
partials
[
d
]
=
partials_get
(
d
,
0
)
+
n
# Find the expected result type. If allowed_types has only one item, it
# will be int; if it has two, use the one which isn't int.
assert
len
(
allowed_types
)
in
(
1
,
2
)
if
len
(
allowed_types
)
==
1
:
assert
allowed_types
.
pop
()
is
int
T
=
int
else
:
T
=
(
allowed_types
-
{
int
}).
pop
()
if
None
in
partials
:
if
None
in
partials
:
assert
issubclass
(
T
,
(
float
,
Decimal
))
# The sum will be a NAN or INF. We can ignore all the finite
assert
not
math
.
isfinite
(
partials
[
None
])
# partials, and just look at this special one.
return
T
(
partials
[
None
])
total
=
partials
[
None
]
total
=
Fraction
()
assert
not
_isfinite
(
total
)
for
d
,
n
in
sorted
(
partials
.
items
()):
else
:
total
+=
Fraction
(
n
,
d
)
# Sum all the partial sums using builtin sum.
if
issubclass
(
T
,
int
):
# FIXME is this faster if we sum them in order of the denominator?
assert
total
.
denominator
==
1
total
=
sum
(
Fraction
(
n
,
d
)
for
d
,
n
in
sorted
(
partials
.
items
()))
return
T
(
total
.
numerator
)
return
(
T
,
total
,
count
)
if
issubclass
(
T
,
Decimal
):
return
T
(
total
.
numerator
)
/
total
.
denominator
return
T
(
total
)
def
_check_type
(
T
,
allowed
):
def
_isfinite
(
x
):
if
T
not
in
allowed
:
try
:
if
len
(
allowed
)
==
1
:
return
x
.
is_finite
()
# Likely a Decimal.
allowed
.
add
(
T
)
except
AttributeError
:
else
:
return
math
.
isfinite
(
x
)
# Coerces to float first.
types
=
', '
.
join
([
t
.
__name__
for
t
in
allowed
]
+
[
T
.
__name__
])
raise
TypeError
(
"unsupported mixed types: %s"
%
types
)
def
_coerce
(
T
,
S
):
"""Coerce types T and S to a common type, or raise TypeError.
Coercion rules are currently an implementation detail. See the CoerceTest
test class in test_statistics for details.
"""
# See http://bugs.python.org/issue24068.
assert
T
is
not
bool
,
"initial type T is bool"
# If the types are the same, no need to coerce anything. Put this
# first, so that the usual case (no coercion needed) happens as soon
# as possible.
if
T
is
S
:
return
T
# Mixed int & other coerce to the other type.
if
S
is
int
or
S
is
bool
:
return
T
if
T
is
int
:
return
S
# If one is a (strict) subclass of the other, coerce to the subclass.
if
issubclass
(
S
,
T
):
return
S
if
issubclass
(
T
,
S
):
return
T
# Ints coerce to the other type.
if
issubclass
(
T
,
int
):
return
S
if
issubclass
(
S
,
int
):
return
T
# Mixed fraction & float coerces to float (or float subclass).
if
issubclass
(
T
,
Fraction
)
and
issubclass
(
S
,
float
):
return
S
if
issubclass
(
T
,
float
)
and
issubclass
(
S
,
Fraction
):
return
T
# Any other combination is disallowed.
msg
=
"don't know how to coerce %s and %s"
raise
TypeError
(
msg
%
(
T
.
__name__
,
S
.
__name__
))
def
_exact_ratio
(
x
):
def
_exact_ratio
(
x
):
"""
Convert Real number x exactly to
(numerator, denominator) pair.
"""
Return Real number x to exact
(numerator, denominator) pair.
>>> _exact_ratio(0.25)
>>> _exact_ratio(0.25)
(1, 4)
(1, 4)
...
@@ -202,29 +220,31 @@ def _exact_ratio(x):
...
@@ -202,29 +220,31 @@ def _exact_ratio(x):
x is expected to be an int, Fraction, Decimal or float.
x is expected to be an int, Fraction, Decimal or float.
"""
"""
try
:
try
:
# Optimise the common case of floats. We expect that the most often
# used numeric type will be builtin floats, so try to make this as
# fast as possible.
if
type
(
x
)
is
float
:
return
x
.
as_integer_ratio
()
try
:
try
:
#
int, Fraction
#
x may be an int, Fraction, or Integral ABC.
return
(
x
.
numerator
,
x
.
denominator
)
return
(
x
.
numerator
,
x
.
denominator
)
except
AttributeError
:
except
AttributeError
:
# float
try
:
try
:
# x may be a float subclass.
return
x
.
as_integer_ratio
()
return
x
.
as_integer_ratio
()
except
AttributeError
:
except
AttributeError
:
# Decimal
try
:
try
:
# x may be a Decimal.
return
_decimal_to_ratio
(
x
)
return
_decimal_to_ratio
(
x
)
except
AttributeError
:
except
AttributeError
:
msg
=
"can't convert type '{}' to numerator/denominator"
# Just give up?
raise
TypeError
(
msg
.
format
(
type
(
x
).
__name__
))
from
None
pass
except
(
OverflowError
,
ValueError
):
except
(
OverflowError
,
ValueError
):
# INF or NAN
# float NAN or INF.
if
__debug__
:
# Decimal signalling NANs cannot be converted to float :-(
if
isinstance
(
x
,
Decimal
):
assert
not
x
.
is_finite
()
else
:
assert
not
math
.
isfinite
(
x
)
assert
not
math
.
isfinite
(
x
)
return
(
x
,
None
)
return
(
x
,
None
)
msg
=
"can't convert type '{}' to numerator/denominator"
raise
TypeError
(
msg
.
format
(
type
(
x
).
__name__
))
# FIXME This is faster than Fraction.from_decimal, but still too slow.
# FIXME This is faster than Fraction.from_decimal, but still too slow.
...
@@ -239,7 +259,7 @@ def _decimal_to_ratio(d):
...
@@ -239,7 +259,7 @@ def _decimal_to_ratio(d):
sign
,
digits
,
exp
=
d
.
as_tuple
()
sign
,
digits
,
exp
=
d
.
as_tuple
()
if
exp
in
(
'F'
,
'n'
,
'N'
):
# INF, NAN, sNAN
if
exp
in
(
'F'
,
'n'
,
'N'
):
# INF, NAN, sNAN
assert
not
d
.
is_finite
()
assert
not
d
.
is_finite
()
r
aise
ValueError
r
eturn
(
d
,
None
)
num
=
0
num
=
0
for
digit
in
digits
:
for
digit
in
digits
:
num
=
num
*
10
+
digit
num
=
num
*
10
+
digit
...
@@ -253,6 +273,24 @@ def _decimal_to_ratio(d):
...
@@ -253,6 +273,24 @@ def _decimal_to_ratio(d):
return
(
num
,
den
)
return
(
num
,
den
)
def
_convert
(
value
,
T
):
"""Convert value to given numeric type T."""
if
type
(
value
)
is
T
:
# This covers the cases where T is Fraction, or where value is
# a NAN or INF (Decimal or float).
return
value
if
issubclass
(
T
,
int
)
and
value
.
denominator
!=
1
:
T
=
float
try
:
# FIXME: what do we do if this overflows?
return
T
(
value
)
except
TypeError
:
if
issubclass
(
T
,
Decimal
):
return
T
(
value
.
numerator
)
/
T
(
value
.
denominator
)
else
:
raise
def
_counts
(
data
):
def
_counts
(
data
):
# Generate a table of sorted (value, frequency) pairs.
# Generate a table of sorted (value, frequency) pairs.
table
=
collections
.
Counter
(
iter
(
data
)).
most_common
()
table
=
collections
.
Counter
(
iter
(
data
)).
most_common
()
...
@@ -290,7 +328,9 @@ def mean(data):
...
@@ -290,7 +328,9 @@ def mean(data):
n
=
len
(
data
)
n
=
len
(
data
)
if
n
<
1
:
if
n
<
1
:
raise
StatisticsError
(
'mean requires at least one data point'
)
raise
StatisticsError
(
'mean requires at least one data point'
)
return
_sum
(
data
)
/
n
T
,
total
,
count
=
_sum
(
data
)
assert
count
==
n
return
_convert
(
total
/
n
,
T
)
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
...
@@ -460,12 +500,14 @@ def _ss(data, c=None):
...
@@ -460,12 +500,14 @@ def _ss(data, c=None):
"""
"""
if
c
is
None
:
if
c
is
None
:
c
=
mean
(
data
)
c
=
mean
(
data
)
ss
=
_sum
((
x
-
c
)
**
2
for
x
in
data
)
T
,
total
,
count
=
_sum
((
x
-
c
)
**
2
for
x
in
data
)
# The following sum should mathematically equal zero, but due to rounding
# The following sum should mathematically equal zero, but due to rounding
# error may not.
# error may not.
ss
-=
_sum
((
x
-
c
)
for
x
in
data
)
**
2
/
len
(
data
)
U
,
total2
,
count2
=
_sum
((
x
-
c
)
for
x
in
data
)
assert
not
ss
<
0
,
'negative sum of square deviations: %f'
%
ss
assert
T
==
U
and
count
==
count2
return
ss
total
-=
total2
**
2
/
len
(
data
)
assert
not
total
<
0
,
'negative sum of square deviations: %f'
%
total
return
(
T
,
total
)
def
variance
(
data
,
xbar
=
None
):
def
variance
(
data
,
xbar
=
None
):
...
@@ -511,8 +553,8 @@ def variance(data, xbar=None):
...
@@ -511,8 +553,8 @@ def variance(data, xbar=None):
n
=
len
(
data
)
n
=
len
(
data
)
if
n
<
2
:
if
n
<
2
:
raise
StatisticsError
(
'variance requires at least two data points'
)
raise
StatisticsError
(
'variance requires at least two data points'
)
ss
=
_ss
(
data
,
xbar
)
T
,
ss
=
_ss
(
data
,
xbar
)
return
ss
/
(
n
-
1
)
return
_convert
(
ss
/
(
n
-
1
),
T
)
def
pvariance
(
data
,
mu
=
None
):
def
pvariance
(
data
,
mu
=
None
):
...
@@ -560,7 +602,8 @@ def pvariance(data, mu=None):
...
@@ -560,7 +602,8 @@ def pvariance(data, mu=None):
if
n
<
1
:
if
n
<
1
:
raise
StatisticsError
(
'pvariance requires at least one data point'
)
raise
StatisticsError
(
'pvariance requires at least one data point'
)
ss
=
_ss
(
data
,
mu
)
ss
=
_ss
(
data
,
mu
)
return
ss
/
n
T
,
ss
=
_ss
(
data
,
mu
)
return
_convert
(
ss
/
n
,
T
)
def
stdev
(
data
,
xbar
=
None
):
def
stdev
(
data
,
xbar
=
None
):
...
...
Lib/test/test_statistics.py
View file @
40a841bc
...
@@ -21,6 +21,37 @@ import statistics
...
@@ -21,6 +21,37 @@ import statistics
# === Helper functions and class ===
# === Helper functions and class ===
def
_nan_equal
(
a
,
b
):
"""Return True if a and b are both the same kind of NAN.
>>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
True
>>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
True
>>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
False
>>> _nan_equal(Decimal(42), Decimal('NAN'))
False
>>> _nan_equal(float('NAN'), float('NAN'))
True
>>> _nan_equal(float('NAN'), 0.5)
False
>>> _nan_equal(float('NAN'), Decimal('NAN'))
False
NAN payloads are not compared.
"""
if
type
(
a
)
is
not
type
(
b
):
return
False
if
isinstance
(
a
,
float
):
return
math
.
isnan
(
a
)
and
math
.
isnan
(
b
)
aexp
=
a
.
as_tuple
()[
2
]
bexp
=
b
.
as_tuple
()[
2
]
return
(
aexp
==
bexp
)
and
(
aexp
in
(
'n'
,
'N'
))
# Both NAN or both sNAN.
def
_calc_errors
(
actual
,
expected
):
def
_calc_errors
(
actual
,
expected
):
"""Return the absolute and relative errors between two numbers.
"""Return the absolute and relative errors between two numbers.
...
@@ -675,15 +706,60 @@ class ExactRatioTest(unittest.TestCase):
...
@@ -675,15 +706,60 @@ class ExactRatioTest(unittest.TestCase):
self
.
assertEqual
(
_exact_ratio
(
D
(
"12.345"
)),
(
12345
,
1000
))
self
.
assertEqual
(
_exact_ratio
(
D
(
"12.345"
)),
(
12345
,
1000
))
self
.
assertEqual
(
_exact_ratio
(
D
(
"-1.98"
)),
(
-
198
,
100
))
self
.
assertEqual
(
_exact_ratio
(
D
(
"-1.98"
)),
(
-
198
,
100
))
def
test_inf
(
self
):
INF
=
float
(
"INF"
)
class
MyFloat
(
float
):
pass
class
MyDecimal
(
Decimal
):
pass
for
inf
in
(
INF
,
-
INF
):
for
type_
in
(
float
,
MyFloat
,
Decimal
,
MyDecimal
):
x
=
type_
(
inf
)
ratio
=
statistics
.
_exact_ratio
(
x
)
self
.
assertEqual
(
ratio
,
(
x
,
None
))
self
.
assertEqual
(
type
(
ratio
[
0
]),
type_
)
self
.
assertTrue
(
math
.
isinf
(
ratio
[
0
]))
def
test_float_nan
(
self
):
NAN
=
float
(
"NAN"
)
class
MyFloat
(
float
):
pass
for
nan
in
(
NAN
,
MyFloat
(
NAN
)):
ratio
=
statistics
.
_exact_ratio
(
nan
)
self
.
assertTrue
(
math
.
isnan
(
ratio
[
0
]))
self
.
assertIs
(
ratio
[
1
],
None
)
self
.
assertEqual
(
type
(
ratio
[
0
]),
type
(
nan
))
def
test_decimal_nan
(
self
):
NAN
=
Decimal
(
"NAN"
)
sNAN
=
Decimal
(
"sNAN"
)
class
MyDecimal
(
Decimal
):
pass
for
nan
in
(
NAN
,
MyDecimal
(
NAN
),
sNAN
,
MyDecimal
(
sNAN
)):
ratio
=
statistics
.
_exact_ratio
(
nan
)
self
.
assertTrue
(
_nan_equal
(
ratio
[
0
],
nan
))
self
.
assertIs
(
ratio
[
1
],
None
)
self
.
assertEqual
(
type
(
ratio
[
0
]),
type
(
nan
))
class
DecimalToRatioTest
(
unittest
.
TestCase
):
class
DecimalToRatioTest
(
unittest
.
TestCase
):
# Test _decimal_to_ratio private function.
# Test _decimal_to_ratio private function.
def
testSpecialsRaise
(
self
):
def
test_infinity
(
self
):
# Test that NANs and INFs raise ValueError.
# Test that INFs are handled correctly.
# Non-special values are covered by _exact_ratio above.
inf
=
Decimal
(
'INF'
)
for
d
in
(
Decimal
(
'NAN'
),
Decimal
(
'sNAN'
),
Decimal
(
'INF'
)):
self
.
assertEqual
(
statistics
.
_decimal_to_ratio
(
inf
),
(
inf
,
None
))
self
.
assertRaises
(
ValueError
,
statistics
.
_decimal_to_ratio
,
d
)
self
.
assertEqual
(
statistics
.
_decimal_to_ratio
(
-
inf
),
(
-
inf
,
None
))
def
test_nan
(
self
):
# Test that NANs are handled correctly.
for
nan
in
(
Decimal
(
'NAN'
),
Decimal
(
'sNAN'
)):
num
,
den
=
statistics
.
_decimal_to_ratio
(
nan
)
# Because NANs always compare non-equal, we cannot use assertEqual.
# Nor can we use an identity test, as we don't guarantee anything
# about the object identity.
self
.
assertTrue
(
_nan_equal
(
num
,
nan
))
self
.
assertIs
(
den
,
None
)
def
test_sign
(
self
):
def
test_sign
(
self
):
# Test sign is calculated correctly.
# Test sign is calculated correctly.
...
@@ -718,25 +794,181 @@ class DecimalToRatioTest(unittest.TestCase):
...
@@ -718,25 +794,181 @@ class DecimalToRatioTest(unittest.TestCase):
self
.
assertEqual
(
t
,
(
147000
,
1
))
self
.
assertEqual
(
t
,
(
147000
,
1
))
class
CheckTypeTest
(
unittest
.
TestCase
):
class
IsFiniteTest
(
unittest
.
TestCase
):
# Test _check_type private function.
# Test _isfinite private function.
def
test_finite
(
self
):
# Test that finite numbers are recognised as finite.
for
x
in
(
5
,
Fraction
(
1
,
3
),
2.5
,
Decimal
(
"5.5"
)):
self
.
assertTrue
(
statistics
.
_isfinite
(
x
))
def
test_infinity
(
self
):
# Test that INFs are not recognised as finite.
for
x
in
(
float
(
"inf"
),
Decimal
(
"inf"
)):
self
.
assertFalse
(
statistics
.
_isfinite
(
x
))
def
test_nan
(
self
):
# Test that NANs are not recognised as finite.
for
x
in
(
float
(
"nan"
),
Decimal
(
"NAN"
),
Decimal
(
"sNAN"
)):
self
.
assertFalse
(
statistics
.
_isfinite
(
x
))
def
test_allowed
(
self
):
# Test that a type which should be allowed is allowed.
allowed
=
set
([
int
,
float
])
statistics
.
_check_type
(
int
,
allowed
)
statistics
.
_check_type
(
float
,
allowed
)
def
test_not_allowed
(
self
):
class
CoerceTest
(
unittest
.
TestCase
):
# Test that a type which should not be allowed raises.
# Test that private function _coerce correctly deals with types.
allowed
=
set
([
int
,
float
])
self
.
assertRaises
(
TypeError
,
statistics
.
_check_type
,
Decimal
,
allowed
)
def
test_add_to_allowed
(
self
):
# The coercion rules are currently an implementation detail, although at
# Test that a second type will be added to the allowed set.
# some point that should change. The tests and comments here define the
allowed
=
set
([
int
])
# correct implementation.
statistics
.
_check_type
(
float
,
allowed
)
self
.
assertEqual
(
allowed
,
set
([
int
,
float
]))
# Pre-conditions of _coerce:
#
# - The first time _sum calls _coerce, the
# - coerce(T, S) will never be called with bool as the first argument;
# this is a pre-condition, guarded with an assertion.
#
# - coerce(T, T) will always return T; we assume T is a valid numeric
# type. Violate this assumption at your own risk.
#
# - Apart from as above, bool is treated as if it were actually int.
#
# - coerce(int, X) and coerce(X, int) return X.
# -
def
test_bool
(
self
):
# bool is somewhat special, due to the pre-condition that it is
# never given as the first argument to _coerce, and that it cannot
# be subclassed. So we test it specially.
for
T
in
(
int
,
float
,
Fraction
,
Decimal
):
self
.
assertIs
(
statistics
.
_coerce
(
T
,
bool
),
T
)
class
MyClass
(
T
):
pass
self
.
assertIs
(
statistics
.
_coerce
(
MyClass
,
bool
),
MyClass
)
def
assertCoerceTo
(
self
,
A
,
B
):
"""Assert that type A coerces to B."""
self
.
assertIs
(
statistics
.
_coerce
(
A
,
B
),
B
)
self
.
assertIs
(
statistics
.
_coerce
(
B
,
A
),
B
)
def
check_coerce_to
(
self
,
A
,
B
):
"""Checks that type A coerces to B, including subclasses."""
# Assert that type A is coerced to B.
self
.
assertCoerceTo
(
A
,
B
)
# Subclasses of A are also coerced to B.
class
SubclassOfA
(
A
):
pass
self
.
assertCoerceTo
(
SubclassOfA
,
B
)
# A, and subclasses of A, are coerced to subclasses of B.
class
SubclassOfB
(
B
):
pass
self
.
assertCoerceTo
(
A
,
SubclassOfB
)
self
.
assertCoerceTo
(
SubclassOfA
,
SubclassOfB
)
def
assertCoerceRaises
(
self
,
A
,
B
):
"""Assert that coercing A to B, or vice versa, raises TypeError."""
self
.
assertRaises
(
TypeError
,
statistics
.
_coerce
,
(
A
,
B
))
self
.
assertRaises
(
TypeError
,
statistics
.
_coerce
,
(
B
,
A
))
def
check_type_coercions
(
self
,
T
):
"""Check that type T coerces correctly with subclasses of itself."""
assert
T
is
not
bool
# Coercing a type with itself returns the same type.
self
.
assertIs
(
statistics
.
_coerce
(
T
,
T
),
T
)
# Coercing a type with a subclass of itself returns the subclass.
class
U
(
T
):
pass
class
V
(
T
):
pass
class
W
(
U
):
pass
for
typ
in
(
U
,
V
,
W
):
self
.
assertCoerceTo
(
T
,
typ
)
self
.
assertCoerceTo
(
U
,
W
)
# Coercing two subclasses that aren't parent/child is an error.
self
.
assertCoerceRaises
(
U
,
V
)
self
.
assertCoerceRaises
(
V
,
W
)
def
test_int
(
self
):
# Check that int coerces correctly.
self
.
check_type_coercions
(
int
)
for
typ
in
(
float
,
Fraction
,
Decimal
):
self
.
check_coerce_to
(
int
,
typ
)
def
test_fraction
(
self
):
# Check that Fraction coerces correctly.
self
.
check_type_coercions
(
Fraction
)
self
.
check_coerce_to
(
Fraction
,
float
)
def
test_decimal
(
self
):
# Check that Decimal coerces correctly.
self
.
check_type_coercions
(
Decimal
)
def
test_float
(
self
):
# Check that float coerces correctly.
self
.
check_type_coercions
(
float
)
def
test_non_numeric_types
(
self
):
for
bad_type
in
(
str
,
list
,
type
(
None
),
tuple
,
dict
):
for
good_type
in
(
int
,
float
,
Fraction
,
Decimal
):
self
.
assertCoerceRaises
(
good_type
,
bad_type
)
def
test_incompatible_types
(
self
):
# Test that incompatible types raise.
for
T
in
(
float
,
Fraction
):
class
MySubclass
(
T
):
pass
self
.
assertCoerceRaises
(
T
,
Decimal
)
self
.
assertCoerceRaises
(
MySubclass
,
Decimal
)
class
ConvertTest
(
unittest
.
TestCase
):
# Test private _convert function.
def
check_exact_equal
(
self
,
x
,
y
):
"""Check that x equals y, and has the same type as well."""
self
.
assertEqual
(
x
,
y
)
self
.
assertIs
(
type
(
x
),
type
(
y
))
def
test_int
(
self
):
# Test conversions to int.
x
=
statistics
.
_convert
(
Fraction
(
71
),
int
)
self
.
check_exact_equal
(
x
,
71
)
class
MyInt
(
int
):
pass
x
=
statistics
.
_convert
(
Fraction
(
17
),
MyInt
)
self
.
check_exact_equal
(
x
,
MyInt
(
17
))
def
test_fraction
(
self
):
# Test conversions to Fraction.
x
=
statistics
.
_convert
(
Fraction
(
95
,
99
),
Fraction
)
self
.
check_exact_equal
(
x
,
Fraction
(
95
,
99
))
class
MyFraction
(
Fraction
):
def
__truediv__
(
self
,
other
):
return
self
.
__class__
(
super
().
__truediv__
(
other
))
x
=
statistics
.
_convert
(
Fraction
(
71
,
13
),
MyFraction
)
self
.
check_exact_equal
(
x
,
MyFraction
(
71
,
13
))
def
test_float
(
self
):
# Test conversions to float.
x
=
statistics
.
_convert
(
Fraction
(
-
1
,
2
),
float
)
self
.
check_exact_equal
(
x
,
-
0.5
)
class
MyFloat
(
float
):
def
__truediv__
(
self
,
other
):
return
self
.
__class__
(
super
().
__truediv__
(
other
))
x
=
statistics
.
_convert
(
Fraction
(
9
,
8
),
MyFloat
)
self
.
check_exact_equal
(
x
,
MyFloat
(
1.125
))
def
test_decimal
(
self
):
# Test conversions to Decimal.
x
=
statistics
.
_convert
(
Fraction
(
1
,
40
),
Decimal
)
self
.
check_exact_equal
(
x
,
Decimal
(
"0.025"
))
class
MyDecimal
(
Decimal
):
def
__truediv__
(
self
,
other
):
return
self
.
__class__
(
super
().
__truediv__
(
other
))
x
=
statistics
.
_convert
(
Fraction
(
-
15
,
16
),
MyDecimal
)
self
.
check_exact_equal
(
x
,
MyDecimal
(
"-0.9375"
))
def
test_inf
(
self
):
for
INF
in
(
float
(
'inf'
),
Decimal
(
'inf'
)):
for
inf
in
(
INF
,
-
INF
):
x
=
statistics
.
_convert
(
inf
,
type
(
inf
))
self
.
check_exact_equal
(
x
,
inf
)
def
test_nan
(
self
):
for
nan
in
(
float
(
'nan'
),
Decimal
(
'NAN'
),
Decimal
(
'sNAN'
)):
x
=
statistics
.
_convert
(
nan
,
type
(
nan
))
self
.
assertTrue
(
_nan_equal
(
x
,
nan
))
# === Tests for public functions ===
# === Tests for public functions ===
...
@@ -874,52 +1106,71 @@ class UnivariateTypeMixin:
...
@@ -874,52 +1106,71 @@ class UnivariateTypeMixin:
self
.
assertIs
(
type
(
result
),
kind
)
self
.
assertIs
(
type
(
result
),
kind
)
class
TestSum
(
NumericTestCase
,
UnivariateCommonMixin
,
UnivariateTypeMixin
):
class
TestSumCommon
(
UnivariateCommonMixin
,
UnivariateTypeMixin
):
# Common test cases for statistics._sum() function.
# This test suite looks only at the numeric value returned by _sum,
# after conversion to the appropriate type.
def
setUp
(
self
):
def
simplified_sum
(
*
args
):
T
,
value
,
n
=
statistics
.
_sum
(
*
args
)
return
statistics
.
_coerce
(
value
,
T
)
self
.
func
=
simplified_sum
class
TestSum
(
NumericTestCase
):
# Test cases for statistics._sum() function.
# Test cases for statistics._sum() function.
# These tests look at the entire three value tuple returned by _sum.
def
setUp
(
self
):
def
setUp
(
self
):
self
.
func
=
statistics
.
_sum
self
.
func
=
statistics
.
_sum
def
test_empty_data
(
self
):
def
test_empty_data
(
self
):
# Override test for empty data.
# Override test for empty data.
for
data
in
([],
(),
iter
([])):
for
data
in
([],
(),
iter
([])):
self
.
assertEqual
(
self
.
func
(
data
),
0
)
self
.
assertEqual
(
self
.
func
(
data
),
(
int
,
Fraction
(
0
),
0
)
)
self
.
assertEqual
(
self
.
func
(
data
,
23
),
23
)
self
.
assertEqual
(
self
.
func
(
data
,
23
),
(
int
,
Fraction
(
23
),
0
)
)
self
.
assertEqual
(
self
.
func
(
data
,
2.3
),
2.3
)
self
.
assertEqual
(
self
.
func
(
data
,
2.3
),
(
float
,
Fraction
(
2.3
),
0
)
)
def
test_ints
(
self
):
def
test_ints
(
self
):
self
.
assertEqual
(
self
.
func
([
1
,
5
,
3
,
-
4
,
-
8
,
20
,
42
,
1
]),
60
)
self
.
assertEqual
(
self
.
func
([
1
,
5
,
3
,
-
4
,
-
8
,
20
,
42
,
1
]),
self
.
assertEqual
(
self
.
func
([
4
,
2
,
3
,
-
8
,
7
],
1000
),
1008
)
(
int
,
Fraction
(
60
),
8
))
self
.
assertEqual
(
self
.
func
([
4
,
2
,
3
,
-
8
,
7
],
1000
),
(
int
,
Fraction
(
1008
),
5
))
def
test_floats
(
self
):
def
test_floats
(
self
):
self
.
assertEqual
(
self
.
func
([
0.25
]
*
20
),
5.0
)
self
.
assertEqual
(
self
.
func
([
0.25
]
*
20
),
self
.
assertEqual
(
self
.
func
([
0.125
,
0.25
,
0.5
,
0.75
],
1.5
),
3.125
)
(
float
,
Fraction
(
5.0
),
20
))
self
.
assertEqual
(
self
.
func
([
0.125
,
0.25
,
0.5
,
0.75
],
1.5
),
(
float
,
Fraction
(
3.125
),
4
))
def
test_fractions
(
self
):
def
test_fractions
(
self
):
F
=
Fraction
self
.
assertEqual
(
self
.
func
([
Fraction
(
1
,
1000
)]
*
500
),
self
.
assertEqual
(
self
.
func
([
Fraction
(
1
,
1000
)]
*
500
),
Fraction
(
1
,
2
))
(
Fraction
,
Fraction
(
1
,
2
),
500
))
def
test_decimals
(
self
):
def
test_decimals
(
self
):
D
=
Decimal
D
=
Decimal
data
=
[
D
(
"0.001"
),
D
(
"5.246"
),
D
(
"1.702"
),
D
(
"-0.025"
),
data
=
[
D
(
"0.001"
),
D
(
"5.246"
),
D
(
"1.702"
),
D
(
"-0.025"
),
D
(
"3.974"
),
D
(
"2.328"
),
D
(
"4.617"
),
D
(
"2.843"
),
D
(
"3.974"
),
D
(
"2.328"
),
D
(
"4.617"
),
D
(
"2.843"
),
]
]
self
.
assertEqual
(
self
.
func
(
data
),
Decimal
(
"20.686"
))
self
.
assertEqual
(
self
.
func
(
data
),
(
Decimal
,
Decimal
(
"20.686"
),
8
))
def
test_compare_with_math_fsum
(
self
):
def
test_compare_with_math_fsum
(
self
):
# Compare with the math.fsum function.
# Compare with the math.fsum function.
# Ideally we ought to get the exact same result, but sometimes
# Ideally we ought to get the exact same result, but sometimes
# we differ by a very slight amount :-(
# we differ by a very slight amount :-(
data
=
[
random
.
uniform
(
-
100
,
1000
)
for
_
in
range
(
1000
)]
data
=
[
random
.
uniform
(
-
100
,
1000
)
for
_
in
range
(
1000
)]
self
.
assertApproxEqual
(
self
.
func
(
data
),
math
.
fsum
(
data
),
rel
=
2e-16
)
self
.
assertApproxEqual
(
float
(
self
.
func
(
data
)[
1
]
),
math
.
fsum
(
data
),
rel
=
2e-16
)
def
test_start_argument
(
self
):
def
test_start_argument
(
self
):
# Test that the optional start argument works correctly.
# Test that the optional start argument works correctly.
data
=
[
random
.
uniform
(
1
,
1000
)
for
_
in
range
(
100
)]
data
=
[
random
.
uniform
(
1
,
1000
)
for
_
in
range
(
100
)]
t
=
self
.
func
(
data
)
t
=
self
.
func
(
data
)
[
1
]
self
.
assertEqual
(
t
+
42
,
self
.
func
(
data
,
42
))
self
.
assertEqual
(
t
+
42
,
self
.
func
(
data
,
42
)
[
1
]
)
self
.
assertEqual
(
t
-
23
,
self
.
func
(
data
,
-
23
))
self
.
assertEqual
(
t
-
23
,
self
.
func
(
data
,
-
23
)
[
1
]
)
self
.
assertEqual
(
t
+
1e20
,
self
.
func
(
data
,
1e20
)
)
self
.
assertEqual
(
t
+
Fraction
(
1e20
),
self
.
func
(
data
,
1e20
)[
1
]
)
def
test_strings_fail
(
self
):
def
test_strings_fail
(
self
):
# Sum of strings should fail.
# Sum of strings should fail.
...
@@ -934,7 +1185,7 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
...
@@ -934,7 +1185,7 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
def
test_mixed_sum
(
self
):
def
test_mixed_sum
(
self
):
# Mixed input types are not (currently) allowed.
# Mixed input types are not (currently) allowed.
# Check that mixed data types fail.
# Check that mixed data types fail.
self
.
assertRaises
(
TypeError
,
self
.
func
,
[
1
,
2.0
,
Fraction
(
1
,
2
)])
self
.
assertRaises
(
TypeError
,
self
.
func
,
[
1
,
2.0
,
Decimal
(
1
)])
# And so does mixed start argument.
# And so does mixed start argument.
self
.
assertRaises
(
TypeError
,
self
.
func
,
[
1
,
2.0
],
Decimal
(
1
))
self
.
assertRaises
(
TypeError
,
self
.
func
,
[
1
,
2.0
],
Decimal
(
1
))
...
@@ -942,11 +1193,14 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
...
@@ -942,11 +1193,14 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
class
SumTortureTest
(
NumericTestCase
):
class
SumTortureTest
(
NumericTestCase
):
def
test_torture
(
self
):
def
test_torture
(
self
):
# Tim Peters' torture test for sum, and variants of same.
# Tim Peters' torture test for sum, and variants of same.
self
.
assertEqual
(
statistics
.
_sum
([
1
,
1e100
,
1
,
-
1e100
]
*
10000
),
20000.0
)
self
.
assertEqual
(
statistics
.
_sum
([
1
,
1e100
,
1
,
-
1e100
]
*
10000
),
self
.
assertEqual
(
statistics
.
_sum
([
1e100
,
1
,
1
,
-
1e100
]
*
10000
),
20000.0
)
(
float
,
Fraction
(
20000.0
),
40000
))
self
.
assertApproxEqual
(
self
.
assertEqual
(
statistics
.
_sum
([
1e100
,
1
,
1
,
-
1e100
]
*
10000
),
statistics
.
_sum
([
1e-100
,
1
,
1e-100
,
-
1
]
*
10000
),
2.0e-96
,
rel
=
5e-16
(
float
,
Fraction
(
20000.0
),
40000
))
)
T
,
num
,
count
=
statistics
.
_sum
([
1e-100
,
1
,
1e-100
,
-
1
]
*
10000
)
self
.
assertIs
(
T
,
float
)
self
.
assertEqual
(
count
,
40000
)
self
.
assertApproxEqual
(
float
(
num
),
2.0e-96
,
rel
=
5e-16
)
class
SumSpecialValues
(
NumericTestCase
):
class
SumSpecialValues
(
NumericTestCase
):
...
@@ -955,7 +1209,7 @@ class SumSpecialValues(NumericTestCase):
...
@@ -955,7 +1209,7 @@ class SumSpecialValues(NumericTestCase):
def
test_nan
(
self
):
def
test_nan
(
self
):
for
type_
in
(
float
,
Decimal
):
for
type_
in
(
float
,
Decimal
):
nan
=
type_
(
'nan'
)
nan
=
type_
(
'nan'
)
result
=
statistics
.
_sum
([
1
,
nan
,
2
])
result
=
statistics
.
_sum
([
1
,
nan
,
2
])
[
1
]
self
.
assertIs
(
type
(
result
),
type_
)
self
.
assertIs
(
type
(
result
),
type_
)
self
.
assertTrue
(
math
.
isnan
(
result
))
self
.
assertTrue
(
math
.
isnan
(
result
))
...
@@ -968,10 +1222,10 @@ class SumSpecialValues(NumericTestCase):
...
@@ -968,10 +1222,10 @@ class SumSpecialValues(NumericTestCase):
def
do_test_inf
(
self
,
inf
):
def
do_test_inf
(
self
,
inf
):
# Adding a single infinity gives infinity.
# Adding a single infinity gives infinity.
result
=
statistics
.
_sum
([
1
,
2
,
inf
,
3
])
result
=
statistics
.
_sum
([
1
,
2
,
inf
,
3
])
[
1
]
self
.
check_infinity
(
result
,
inf
)
self
.
check_infinity
(
result
,
inf
)
# Adding two infinities of the same sign also gives infinity.
# Adding two infinities of the same sign also gives infinity.
result
=
statistics
.
_sum
([
1
,
2
,
inf
,
3
,
inf
,
4
])
result
=
statistics
.
_sum
([
1
,
2
,
inf
,
3
,
inf
,
4
])
[
1
]
self
.
check_infinity
(
result
,
inf
)
self
.
check_infinity
(
result
,
inf
)
def
test_float_inf
(
self
):
def
test_float_inf
(
self
):
...
@@ -987,7 +1241,7 @@ class SumSpecialValues(NumericTestCase):
...
@@ -987,7 +1241,7 @@ class SumSpecialValues(NumericTestCase):
def
test_float_mismatched_infs
(
self
):
def
test_float_mismatched_infs
(
self
):
# Test that adding two infinities of opposite sign gives a NAN.
# Test that adding two infinities of opposite sign gives a NAN.
inf
=
float
(
'inf'
)
inf
=
float
(
'inf'
)
result
=
statistics
.
_sum
([
1
,
2
,
inf
,
3
,
-
inf
,
4
])
result
=
statistics
.
_sum
([
1
,
2
,
inf
,
3
,
-
inf
,
4
])
[
1
]
self
.
assertTrue
(
math
.
isnan
(
result
))
self
.
assertTrue
(
math
.
isnan
(
result
))
def
test_decimal_extendedcontext_mismatched_infs_to_nan
(
self
):
def
test_decimal_extendedcontext_mismatched_infs_to_nan
(
self
):
...
@@ -995,7 +1249,7 @@ class SumSpecialValues(NumericTestCase):
...
@@ -995,7 +1249,7 @@ class SumSpecialValues(NumericTestCase):
inf
=
Decimal
(
'inf'
)
inf
=
Decimal
(
'inf'
)
data
=
[
1
,
2
,
inf
,
3
,
-
inf
,
4
]
data
=
[
1
,
2
,
inf
,
3
,
-
inf
,
4
]
with
decimal
.
localcontext
(
decimal
.
ExtendedContext
):
with
decimal
.
localcontext
(
decimal
.
ExtendedContext
):
self
.
assertTrue
(
math
.
isnan
(
statistics
.
_sum
(
data
)))
self
.
assertTrue
(
math
.
isnan
(
statistics
.
_sum
(
data
)
[
1
]
))
def
test_decimal_basiccontext_mismatched_infs_to_nan
(
self
):
def
test_decimal_basiccontext_mismatched_infs_to_nan
(
self
):
# Test adding Decimal INFs with opposite sign raises InvalidOperation.
# Test adding Decimal INFs with opposite sign raises InvalidOperation.
...
@@ -1111,6 +1365,19 @@ class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
...
@@ -1111,6 +1365,19 @@ class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
d
=
Decimal
(
'1e4'
)
d
=
Decimal
(
'1e4'
)
self
.
assertEqual
(
statistics
.
mean
([
d
]),
d
)
self
.
assertEqual
(
statistics
.
mean
([
d
]),
d
)
def
test_regression_25177
(
self
):
# Regression test for issue 25177.
# Ensure very big and very small floats don't overflow.
# See http://bugs.python.org/issue25177.
self
.
assertEqual
(
statistics
.
mean
(
[
8.988465674311579e+307
,
8.98846567431158e+307
]),
8.98846567431158e+307
)
big
=
8.98846567431158e+307
tiny
=
5e-324
for
n
in
(
2
,
3
,
5
,
200
):
self
.
assertEqual
(
statistics
.
mean
([
big
]
*
n
),
big
)
self
.
assertEqual
(
statistics
.
mean
([
tiny
]
*
n
),
tiny
)
class
TestMedian
(
NumericTestCase
,
AverageMixin
):
class
TestMedian
(
NumericTestCase
,
AverageMixin
):
# Common tests for median and all median.* functions.
# Common tests for median and all median.* functions.
...
...
Misc/NEWS
View file @
40a841bc
...
@@ -20,6 +20,10 @@ Core and Builtins
...
@@ -20,6 +20,10 @@ Core and Builtins
Library
Library
-------
-------
- Issue #25177: Fixed problem with the mean of very small and very large
numbers. As a side effect, statistics.mean and statistics.variance should
be significantly faster.
- Issue #25718: Fixed copying object with state with boolean value is false.
- Issue #25718: Fixed copying object with state with boolean value is false.
- Issue #10131: Fixed deep copying of minidom documents. Based on patch
- Issue #10131: Fixed deep copying of minidom documents. Based on patch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment