Commit e917f2ed authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

bpo-36546: Add more tests and expand docs (#13406)

parent 73934b9d
...@@ -511,22 +511,33 @@ However, for reading convenience, most of the examples show sorted sequences. ...@@ -511,22 +511,33 @@ However, for reading convenience, most of the examples show sorted sequences.
is not least 1. is not least 1.
The *dist* can be any iterable containing sample data or it can be an The *dist* can be any iterable containing sample data or it can be an
instance of a class that defines an :meth:`~inv_cdf` method. instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
results, the number of data points in *dist* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points. Raises :exc:`StatisticsError` if there are not at least two data points.
For sample data, the cut points are linearly interpolated from the For sample data, the cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third two nearest data points. For example, if a cut point falls one-third
of the distance between two sample values, ``100`` and ``112``, the of the distance between two sample values, ``100`` and ``112``, the
cut-point will evaluate to ``104``. Other selection methods may be cut-point will evaluate to ``104``.
offered in the future (for example choose ``100`` as the nearest
value or compute ``106`` as the midpoint). This might matter if The *method* for computing quantiles can be varied depending on
there are too few samples for a given number of cut points. whether the data in *dist* includes or excludes the lowest and
highest possible values from the population.
If *method* is set to *inclusive*, *dist* is treated as population data.
The minimum value is treated as the 0th percentile and the maximum The default *method* is "exclusive" and is used for data sampled from
value is treated as the 100th percentile. If *dist* is an instance of a population that can have more extreme values than found in the
a class that defines an :meth:`~inv_cdf` method, setting *method* samples. The portion of the population falling below the *i-th* of
has no effect. *m* data points is computed as ``i / (m + 1)``.
Setting the *method* to "inclusive" is used for describing population
data or for samples that include the extreme points. The minimum
value in *dist* is treated as the 0th percentile and the maximum
value is treated as the 100th percentile. The portion of the
population falling below the *i-th* of *m* data points is computed as
``(i - 1) / (m - 1)``.
If *dist* is an instance of a class that defines an
:meth:`~inv_cdf` method, setting *method* has no effect.
.. doctest:: .. doctest::
......
...@@ -2161,17 +2161,18 @@ class TestQuantiles(unittest.TestCase): ...@@ -2161,17 +2161,18 @@ class TestQuantiles(unittest.TestCase):
# Quantiles should be idempotent # Quantiles should be idempotent
if len(expected) >= 2: if len(expected) >= 2:
self.assertEqual(quantiles(expected, n=n), expected) self.assertEqual(quantiles(expected, n=n), expected)
# Cross-check against other methods # Cross-check against method='inclusive' which should give
if len(data) >= n: # the same result after adding in minimum and maximum values
# After end caps are added, method='inclusive' should # extrapolated from the two lowest and two highest points.
# give the same result as method='exclusive' whenever sdata = sorted(data)
# there are more data points than desired cut points. lo = 2 * sdata[0] - sdata[1]
padded_data = [min(data) - 1000] + data + [max(data) + 1000] hi = 2 * sdata[-1] - sdata[-2]
self.assertEqual( padded_data = data + [lo, hi]
quantiles(data, n=n), self.assertEqual(
quantiles(padded_data, n=n, method='inclusive'), quantiles(data, n=n),
(n, data), quantiles(padded_data, n=n, method='inclusive'),
) (n, data),
)
# Invariant under tranlation and scaling # Invariant under tranlation and scaling
def f(x): def f(x):
return 3.5 * x - 1234.675 return 3.5 * x - 1234.675
...@@ -2188,6 +2189,11 @@ class TestQuantiles(unittest.TestCase): ...@@ -2188,6 +2189,11 @@ class TestQuantiles(unittest.TestCase):
actual = quantiles(statistics.NormalDist(), n=n) actual = quantiles(statistics.NormalDist(), n=n)
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual))) for e, a in zip(expected, actual)))
# Q2 agrees with median()
for k in range(2, 60):
data = random.choices(range(100), k=k)
q1, q2, q3 = quantiles(data)
self.assertEqual(q2, statistics.median(data))
def test_specific_cases_inclusive(self): def test_specific_cases_inclusive(self):
# Match results computed by hand and cross-checked # Match results computed by hand and cross-checked
...@@ -2233,6 +2239,11 @@ class TestQuantiles(unittest.TestCase): ...@@ -2233,6 +2239,11 @@ class TestQuantiles(unittest.TestCase):
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive") actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual))) for e, a in zip(expected, actual)))
# Natural deciles
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
# Whenever n is smaller than the number of data points, running # Whenever n is smaller than the number of data points, running
# method='inclusive' should give the same result as method='exclusive' # method='inclusive' should give the same result as method='exclusive'
# after the two included extreme points are removed. # after the two included extreme points are removed.
...@@ -2242,6 +2253,11 @@ class TestQuantiles(unittest.TestCase): ...@@ -2242,6 +2253,11 @@ class TestQuantiles(unittest.TestCase):
data.remove(max(data)) data.remove(max(data))
expected = quantiles(data, n=32) expected = quantiles(data, n=32)
self.assertEqual(expected, actual) self.assertEqual(expected, actual)
# Q2 agrees with median()
for k in range(2, 60):
data = random.choices(range(100), k=k)
q1, q2, q3 = quantiles(data, method='inclusive')
self.assertEqual(q2, statistics.median(data))
def test_equal_inputs(self): def test_equal_inputs(self):
quantiles = statistics.quantiles quantiles = statistics.quantiles
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment