Commit e4810b2a authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

bpo-36324: Apply review comments from Allen Downey (GH-15693)

parent 8f9cc877
This diff is collapsed.
...@@ -322,7 +322,6 @@ def fmean(data): ...@@ -322,7 +322,6 @@ def fmean(data):
"""Convert data to floats and compute the arithmetic mean. """Convert data to floats and compute the arithmetic mean.
This runs faster than the mean() function and it always returns a float. This runs faster than the mean() function and it always returns a float.
The result is highly accurate but not as perfect as mean().
If the input dataset is empty, it raises a StatisticsError. If the input dataset is empty, it raises a StatisticsError.
>>> fmean([3.5, 4.0, 5.25]) >>> fmean([3.5, 4.0, 5.25])
...@@ -546,7 +545,8 @@ def mode(data): ...@@ -546,7 +545,8 @@ def mode(data):
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
'red' 'red'
If there are multiple modes, return the first one encountered. If there are multiple modes with same frequency, return the first one
encountered:
>>> mode(['red', 'red', 'green', 'blue', 'blue']) >>> mode(['red', 'red', 'green', 'blue', 'blue'])
'red' 'red'
...@@ -615,28 +615,28 @@ def multimode(data): ...@@ -615,28 +615,28 @@ def multimode(data):
# position is that fewer options make for easier choices and that # position is that fewer options make for easier choices and that
# external packages can be used for anything more advanced. # external packages can be used for anything more advanced.
def quantiles(dist, /, *, n=4, method='exclusive'): def quantiles(data, /, *, n=4, method='exclusive'):
"""Divide *dist* into *n* continuous intervals with equal probability. """Divide *data* into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals. Returns a list of (n - 1) cut points separating the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that Set *n* to 100 for percentiles which gives the 99 cuts points that
separate *dist* in to 100 equal sized groups. separate *data* in to 100 equal sized groups.
The *dist* can be any iterable containing sample data or it can be The *data* can be any iterable containing sample data or it can be
an instance of a class that defines an inv_cdf() method. For sample an instance of a class that defines an inv_cdf() method. For sample
data, the cut points are linearly interpolated between data points. data, the cut points are linearly interpolated between data points.
If *method* is set to *inclusive*, *dist* is treated as population If *method* is set to *inclusive*, *data* is treated as population
data. The minimum value is treated as the 0th percentile and the data. The minimum value is treated as the 0th percentile and the
maximum value is treated as the 100th percentile. maximum value is treated as the 100th percentile.
""" """
if n < 1: if n < 1:
raise StatisticsError('n must be at least 1') raise StatisticsError('n must be at least 1')
if hasattr(dist, 'inv_cdf'): if hasattr(data, 'inv_cdf'):
return [dist.inv_cdf(i / n) for i in range(1, n)] return [data.inv_cdf(i / n) for i in range(1, n)]
data = sorted(dist) data = sorted(data)
ld = len(data) ld = len(data)
if ld < 2: if ld < 2:
raise StatisticsError('must have at least two data points') raise StatisticsError('must have at least two data points')
...@@ -745,7 +745,7 @@ def variance(data, xbar=None): ...@@ -745,7 +745,7 @@ def variance(data, xbar=None):
def pvariance(data, mu=None): def pvariance(data, mu=None):
"""Return the population variance of ``data``. """Return the population variance of ``data``.
data should be an iterable of Real-valued numbers, with at least one data should be a sequence or iterator of Real-valued numbers, with at least one
value. The optional argument mu, if given, should be the mean of value. The optional argument mu, if given, should be the mean of
the data. If it is missing or None, the mean is automatically calculated. the data. If it is missing or None, the mean is automatically calculated.
...@@ -766,10 +766,6 @@ def pvariance(data, mu=None): ...@@ -766,10 +766,6 @@ def pvariance(data, mu=None):
>>> pvariance(data, mu) >>> pvariance(data, mu)
1.25 1.25
This function does not check that ``mu`` is actually the mean of ``data``.
Giving arbitrary values for ``mu`` may lead to invalid or impossible
results.
Decimals and Fractions are supported: Decimals and Fractions are supported:
>>> from decimal import Decimal as D >>> from decimal import Decimal as D
...@@ -913,8 +909,8 @@ class NormalDist: ...@@ -913,8 +909,8 @@ class NormalDist:
"NormalDist where mu is the mean and sigma is the standard deviation." "NormalDist where mu is the mean and sigma is the standard deviation."
if sigma < 0.0: if sigma < 0.0:
raise StatisticsError('sigma must be non-negative') raise StatisticsError('sigma must be non-negative')
self._mu = mu self._mu = float(mu)
self._sigma = sigma self._sigma = float(sigma)
@classmethod @classmethod
def from_samples(cls, data): def from_samples(cls, data):
......
...@@ -416,6 +416,7 @@ Dima Dorfman ...@@ -416,6 +416,7 @@ Dima Dorfman
Yves Dorfsman Yves Dorfsman
Michael Dorman Michael Dorman
Steve Dower Steve Dower
Allen Downey
Cesar Douady Cesar Douady
Dean Draayer Dean Draayer
Fred L. Drake, Jr. Fred L. Drake, Jr.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment