Commit 4db25d5c authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

bpo-36018: Address more reviewer feedback (GH-15733)

parent 3c87a667
......@@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
*n* to 100 for percentiles which gives the 99 cuts points that separate
*data* in to 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
*data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
is not least 1.
The *data* can be any iterable containing sample data or it can be an
instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
The *data* can be any iterable containing sample data. For meaningful
results, the number of data points in *data* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points.
For sample data, the cut points are linearly interpolated from the
The cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third
of the distance between two sample values, ``100`` and ``112``, the
cut-point will evaluate to ``104``.
......@@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences.
values, the method sorts them and assigns the following percentiles:
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
If *data* is an instance of a class that defines an
:meth:`~inv_cdf` method, setting *method* has no effect.
.. doctest::
# Decile cut points for empirically sampled data
......@@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences.
>>> [round(q, 1) for q in quantiles(data, n=10)]
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
>>> # Quartile cut points for the standard normal distribution
>>> Z = NormalDist()
>>> [round(q, 4) for q in quantiles(Z, n=4)]
[-0.6745, 0.0, 0.6745]
.. versionadded:: 3.8
......@@ -607,6 +598,18 @@ of applications in statistics.
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
distribution.
.. attribute:: median
A read-only property for the `median
<https://en.wikipedia.org/wiki/Median>`_ of a normal
distribution.
.. attribute:: mode
A read-only property for the `mode
<https://en.wikipedia.org/wiki/Mode_(statistics)>`_ of a normal
distribution.
.. attribute:: stdev
A read-only property for the `standard deviation
......@@ -678,6 +681,16 @@ of applications in statistics.
the two probability density functions
<https://www.rasch.org/rmt/rmt101r.htm>`_.
.. method:: NormalDist.quantiles()
Divide the normal distribution into *n* continuous intervals with
equal probability. Returns a list of (n - 1) cut points separating
the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate the normal distribution into 100 equal sized groups.
Instances of :class:`NormalDist` support addition, subtraction,
multiplication and division by a constant. These operations
are used for translation and scaling. For example:
......@@ -733,9 +746,9 @@ Find the `quartiles <https://en.wikipedia.org/wiki/Quartile>`_ and `deciles
.. doctest::
>>> list(map(round, quantiles(sat)))
>>> list(map(round, sat.quantiles()))
[928, 1060, 1192]
>>> list(map(round, quantiles(sat, n=10)))
>>> list(map(round, sat.quantiles(n=10)))
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
To estimate the distribution for a model than isn't easy to solve
......
......@@ -624,9 +624,8 @@ def quantiles(data, /, *, n=4, method='exclusive'):
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate *data* in to 100 equal sized groups.
The *data* can be any iterable containing sample data or it can be
an instance of a class that defines an inv_cdf() method. For sample
data, the cut points are linearly interpolated between data points.
The *data* can be any iterable containing sample.
The cut points are linearly interpolated between data points.
If *method* is set to *inclusive*, *data* is treated as population
data. The minimum value is treated as the 0th percentile and the
......@@ -634,8 +633,6 @@ def quantiles(data, /, *, n=4, method='exclusive'):
"""
if n < 1:
raise StatisticsError('n must be at least 1')
if hasattr(data, 'inv_cdf'):
return [data.inv_cdf(i / n) for i in range(1, n)]
data = sorted(data)
ld = len(data)
if ld < 2:
......@@ -955,6 +952,17 @@ class NormalDist:
raise StatisticsError('cdf() not defined when sigma at or below zero')
return _normal_dist_inv_cdf(p, self._mu, self._sigma)
def quantiles(self, n=4):
"""Divide into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate the normal distribution in to 100 equal sized groups.
"""
return [self.inv_cdf(i / n) for i in range(1, n)]
def overlap(self, other):
"""Compute the overlapping coefficient (OVL) between two normal distributions.
......@@ -994,6 +1002,20 @@ class NormalDist:
"Arithmetic mean of the normal distribution."
return self._mu
@property
def median(self):
"Return the median of the normal distribution"
return self._mu
@property
def mode(self):
"""Return the mode of the normal distribution
The mode is the value x where which the probability density
function (pdf) takes its maximum value.
"""
return self._mu
@property
def stdev(self):
"Standard deviation of the normal distribution."
......
......@@ -2198,16 +2198,6 @@ class TestQuantiles(unittest.TestCase):
exp = list(map(f, expected))
act = quantiles(map(f, data), n=n)
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
# Quartiles of a standard normal distribution
for n, expected in [
(1, []),
(2, [0.0]),
(3, [-0.4307, 0.4307]),
(4 ,[-0.6745, 0.0, 0.6745]),
]:
actual = quantiles(statistics.NormalDist(), n=n)
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual)))
# Q2 agrees with median()
for k in range(2, 60):
data = random.choices(range(100), k=k)
......@@ -2248,16 +2238,6 @@ class TestQuantiles(unittest.TestCase):
exp = list(map(f, expected))
act = quantiles(map(f, data), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
# Quartiles of a standard normal distribution
for n, expected in [
(1, []),
(2, [0.0]),
(3, [-0.4307, 0.4307]),
(4 ,[-0.6745, 0.0, 0.6745]),
]:
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual)))
# Natural deciles
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
......@@ -2546,6 +2526,19 @@ class TestNormalDist:
# Special values
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
def test_quantiles(self):
# Quartiles of a standard normal distribution
Z = self.module.NormalDist()
for n, expected in [
(1, []),
(2, [0.0]),
(3, [-0.4307, 0.4307]),
(4 ,[-0.6745, 0.0, 0.6745]),
]:
actual = Z.quantiles(n=n)
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual)))
def test_overlap(self):
NormalDist = self.module.NormalDist
......@@ -2612,6 +2605,8 @@ class TestNormalDist:
def test_properties(self):
X = self.module.NormalDist(100, 15)
self.assertEqual(X.mean, 100)
self.assertEqual(X.median, 100)
self.assertEqual(X.mode, 100)
self.assertEqual(X.stdev, 15)
self.assertEqual(X.variance, 225)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment