Commit 1c0e9bb9 authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

Minor whitespace, indentation, and quoting changes to improve internal...

Minor whitespace, indentation, and quoting changes to improve internal consistency and appease linters (GH-14888)
parent 22f0483d
...@@ -80,12 +80,25 @@ A single exception is defined: StatisticsError is a subclass of ValueError. ...@@ -80,12 +80,25 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
""" """
__all__ = [ 'StatisticsError', 'NormalDist', 'quantiles', __all__ = [
'pstdev', 'pvariance', 'stdev', 'variance', 'NormalDist',
'median', 'median_low', 'median_high', 'median_grouped', 'StatisticsError',
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', 'fmean',
'geometric_mean', 'geometric_mean',
] 'harmonic_mean',
'mean',
'median',
'median_grouped',
'median_high',
'median_low',
'mode',
'multimode',
'pstdev',
'pvariance',
'quantiles',
'stdev',
'variance',
]
import math import math
import numbers import numbers
...@@ -304,8 +317,9 @@ def mean(data): ...@@ -304,8 +317,9 @@ def mean(data):
assert count == n assert count == n
return _convert(total/n, T) return _convert(total/n, T)
def fmean(data): def fmean(data):
""" Convert data to floats and compute the arithmetic mean. """Convert data to floats and compute the arithmetic mean.
This runs faster than the mean() function and it always returns a float. This runs faster than the mean() function and it always returns a float.
The result is highly accurate but not as perfect as mean(). The result is highly accurate but not as perfect as mean().
...@@ -313,7 +327,6 @@ def fmean(data): ...@@ -313,7 +327,6 @@ def fmean(data):
>>> fmean([3.5, 4.0, 5.25]) >>> fmean([3.5, 4.0, 5.25])
4.25 4.25
""" """
try: try:
n = len(data) n = len(data)
...@@ -332,6 +345,7 @@ def fmean(data): ...@@ -332,6 +345,7 @@ def fmean(data):
except ZeroDivisionError: except ZeroDivisionError:
raise StatisticsError('fmean requires at least one data point') from None raise StatisticsError('fmean requires at least one data point') from None
def geometric_mean(data): def geometric_mean(data):
"""Convert data to floats and compute the geometric mean. """Convert data to floats and compute the geometric mean.
...@@ -350,6 +364,7 @@ def geometric_mean(data): ...@@ -350,6 +364,7 @@ def geometric_mean(data):
raise StatisticsError('geometric mean requires a non-empty dataset ' raise StatisticsError('geometric mean requires a non-empty dataset '
' containing positive numbers') from None ' containing positive numbers') from None
def harmonic_mean(data): def harmonic_mean(data):
"""Return the harmonic mean of data. """Return the harmonic mean of data.
...@@ -547,23 +562,23 @@ def mode(data): ...@@ -547,23 +562,23 @@ def mode(data):
def multimode(data): def multimode(data):
""" Return a list of the most frequently occurring values. """Return a list of the most frequently occurring values.
Will return more than one result if there are multiple modes
or an empty list if *data* is empty.
>>> multimode('aabbbbbbbbcc') Will return more than one result if there are multiple modes
['b'] or an empty list if *data* is empty.
>>> multimode('aabbbbccddddeeffffgg')
['b', 'd', 'f']
>>> multimode('')
[]
>>> multimode('aabbbbbbbbcc')
['b']
>>> multimode('aabbbbccddddeeffffgg')
['b', 'd', 'f']
>>> multimode('')
[]
""" """
counts = Counter(iter(data)).most_common() counts = Counter(iter(data)).most_common()
maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, [])) maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
return list(map(itemgetter(0), mode_items)) return list(map(itemgetter(0), mode_items))
# Notes on methods for computing quantiles # Notes on methods for computing quantiles
# ---------------------------------------- # ----------------------------------------
# #
...@@ -601,7 +616,7 @@ def multimode(data): ...@@ -601,7 +616,7 @@ def multimode(data):
# external packages can be used for anything more advanced. # external packages can be used for anything more advanced.
def quantiles(dist, /, *, n=4, method='exclusive'): def quantiles(dist, /, *, n=4, method='exclusive'):
'''Divide *dist* into *n* continuous intervals with equal probability. """Divide *dist* into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals. Returns a list of (n - 1) cut points separating the intervals.
...@@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'): ...@@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
If *method* is set to *inclusive*, *dist* is treated as population If *method* is set to *inclusive*, *dist* is treated as population
data. The minimum value is treated as the 0th percentile and the data. The minimum value is treated as the 0th percentile and the
maximum value is treated as the 100th percentile. maximum value is treated as the 100th percentile.
''' """
if n < 1: if n < 1:
raise StatisticsError('n must be at least 1') raise StatisticsError('n must be at least 1')
if hasattr(dist, 'inv_cdf'): if hasattr(dist, 'inv_cdf'):
...@@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'): ...@@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
return result return result
raise ValueError(f'Unknown method: {method!r}') raise ValueError(f'Unknown method: {method!r}')
# === Measures of spread === # === Measures of spread ===
# See http://mathworld.wolfram.com/Variance.html # See http://mathworld.wolfram.com/Variance.html
...@@ -805,18 +821,21 @@ def pstdev(data, mu=None): ...@@ -805,18 +821,21 @@ def pstdev(data, mu=None):
except AttributeError: except AttributeError:
return math.sqrt(var) return math.sqrt(var)
## Normal Distribution ##################################################### ## Normal Distribution #####################################################
class NormalDist: class NormalDist:
'Normal distribution of a random variable' "Normal distribution of a random variable"
# https://en.wikipedia.org/wiki/Normal_distribution # https://en.wikipedia.org/wiki/Normal_distribution
# https://en.wikipedia.org/wiki/Variance#Properties # https://en.wikipedia.org/wiki/Variance#Properties
__slots__ = {'_mu': 'Arithmetic mean of a normal distribution', __slots__ = {
'_sigma': 'Standard deviation of a normal distribution'} '_mu': 'Arithmetic mean of a normal distribution',
'_sigma': 'Standard deviation of a normal distribution',
}
def __init__(self, mu=0.0, sigma=1.0): def __init__(self, mu=0.0, sigma=1.0):
'NormalDist where mu is the mean and sigma is the standard deviation.' "NormalDist where mu is the mean and sigma is the standard deviation."
if sigma < 0.0: if sigma < 0.0:
raise StatisticsError('sigma must be non-negative') raise StatisticsError('sigma must be non-negative')
self._mu = mu self._mu = mu
...@@ -824,40 +843,42 @@ class NormalDist: ...@@ -824,40 +843,42 @@ class NormalDist:
@classmethod @classmethod
def from_samples(cls, data): def from_samples(cls, data):
'Make a normal distribution instance from sample data.' "Make a normal distribution instance from sample data."
if not isinstance(data, (list, tuple)): if not isinstance(data, (list, tuple)):
data = list(data) data = list(data)
xbar = fmean(data) xbar = fmean(data)
return cls(xbar, stdev(data, xbar)) return cls(xbar, stdev(data, xbar))
def samples(self, n, *, seed=None): def samples(self, n, *, seed=None):
'Generate *n* samples for a given mean and standard deviation.' "Generate *n* samples for a given mean and standard deviation."
gauss = random.gauss if seed is None else random.Random(seed).gauss gauss = random.gauss if seed is None else random.Random(seed).gauss
mu, sigma = self._mu, self._sigma mu, sigma = self._mu, self._sigma
return [gauss(mu, sigma) for i in range(n)] return [gauss(mu, sigma) for i in range(n)]
def pdf(self, x): def pdf(self, x):
'Probability density function. P(x <= X < x+dx) / dx' "Probability density function. P(x <= X < x+dx) / dx"
variance = self._sigma ** 2.0 variance = self._sigma ** 2.0
if not variance: if not variance:
raise StatisticsError('pdf() not defined when sigma is zero') raise StatisticsError('pdf() not defined when sigma is zero')
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance) return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau*variance)
def cdf(self, x): def cdf(self, x):
'Cumulative distribution function. P(X <= x)' "Cumulative distribution function. P(X <= x)"
if not self._sigma: if not self._sigma:
raise StatisticsError('cdf() not defined when sigma is zero') raise StatisticsError('cdf() not defined when sigma is zero')
return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0)))) return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0))))
def inv_cdf(self, p): def inv_cdf(self, p):
'''Inverse cumulative distribution function. x : P(X <= x) = p """Inverse cumulative distribution function. x : P(X <= x) = p
Finds the value of the random variable such that the probability of the Finds the value of the random variable such that the probability of
variable being less than or equal to that value equals the given probability. the variable being less than or equal to that value equals the given
probability.
This function is also called the percent point function or quantile function. This function is also called the percent point function or quantile
''' function.
if (p <= 0.0 or p >= 1.0): """
if p <= 0.0 or p >= 1.0:
raise StatisticsError('p must be in the range 0.0 < p < 1.0') raise StatisticsError('p must be in the range 0.0 < p < 1.0')
if self._sigma <= 0.0: if self._sigma <= 0.0:
raise StatisticsError('cdf() not defined when sigma at or below zero') raise StatisticsError('cdf() not defined when sigma at or below zero')
...@@ -933,7 +954,7 @@ class NormalDist: ...@@ -933,7 +954,7 @@ class NormalDist:
return self._mu + (x * self._sigma) return self._mu + (x * self._sigma)
def overlap(self, other): def overlap(self, other):
'''Compute the overlapping coefficient (OVL) between two normal distributions. """Compute the overlapping coefficient (OVL) between two normal distributions.
Measures the agreement between two normal probability distributions. Measures the agreement between two normal probability distributions.
Returns a value between 0.0 and 1.0 giving the overlapping area in Returns a value between 0.0 and 1.0 giving the overlapping area in
...@@ -943,7 +964,7 @@ class NormalDist: ...@@ -943,7 +964,7 @@ class NormalDist:
>>> N2 = NormalDist(3.2, 2.0) >>> N2 = NormalDist(3.2, 2.0)
>>> N1.overlap(N2) >>> N1.overlap(N2)
0.8035050657330205 0.8035050657330205
''' """
# See: "The overlapping coefficient as a measure of agreement between # See: "The overlapping coefficient as a measure of agreement between
# probability distributions and point estimation of the overlap of two # probability distributions and point estimation of the overlap of two
# normal densities" -- Henry F. Inman and Edwin L. Bradley Jr # normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
...@@ -968,21 +989,21 @@ class NormalDist: ...@@ -968,21 +989,21 @@ class NormalDist:
@property @property
def mean(self): def mean(self):
'Arithmetic mean of the normal distribution.' "Arithmetic mean of the normal distribution."
return self._mu return self._mu
@property @property
def stdev(self): def stdev(self):
'Standard deviation of the normal distribution.' "Standard deviation of the normal distribution."
return self._sigma return self._sigma
@property @property
def variance(self): def variance(self):
'Square of the standard deviation.' "Square of the standard deviation."
return self._sigma ** 2.0 return self._sigma ** 2.0
def __add__(x1, x2): def __add__(x1, x2):
'''Add a constant or another NormalDist instance. """Add a constant or another NormalDist instance.
If *other* is a constant, translate mu by the constant, If *other* is a constant, translate mu by the constant,
leaving sigma unchanged. leaving sigma unchanged.
...@@ -990,13 +1011,13 @@ class NormalDist: ...@@ -990,13 +1011,13 @@ class NormalDist:
If *other* is a NormalDist, add both the means and the variances. If *other* is a NormalDist, add both the means and the variances.
Mathematically, this works only if the two distributions are Mathematically, this works only if the two distributions are
independent or if they are jointly normally distributed. independent or if they are jointly normally distributed.
''' """
if isinstance(x2, NormalDist): if isinstance(x2, NormalDist):
return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma)) return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma))
return NormalDist(x1._mu + x2, x1._sigma) return NormalDist(x1._mu + x2, x1._sigma)
def __sub__(x1, x2): def __sub__(x1, x2):
'''Subtract a constant or another NormalDist instance. """Subtract a constant or another NormalDist instance.
If *other* is a constant, translate by the constant mu, If *other* is a constant, translate by the constant mu,
leaving sigma unchanged. leaving sigma unchanged.
...@@ -1004,51 +1025,51 @@ class NormalDist: ...@@ -1004,51 +1025,51 @@ class NormalDist:
If *other* is a NormalDist, subtract the means and add the variances. If *other* is a NormalDist, subtract the means and add the variances.
Mathematically, this works only if the two distributions are Mathematically, this works only if the two distributions are
independent or if they are jointly normally distributed. independent or if they are jointly normally distributed.
''' """
if isinstance(x2, NormalDist): if isinstance(x2, NormalDist):
return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma)) return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma))
return NormalDist(x1._mu - x2, x1._sigma) return NormalDist(x1._mu - x2, x1._sigma)
def __mul__(x1, x2): def __mul__(x1, x2):
'''Multiply both mu and sigma by a constant. """Multiply both mu and sigma by a constant.
Used for rescaling, perhaps to change measurement units. Used for rescaling, perhaps to change measurement units.
Sigma is scaled with the absolute value of the constant. Sigma is scaled with the absolute value of the constant.
''' """
return NormalDist(x1._mu * x2, x1._sigma * fabs(x2)) return NormalDist(x1._mu * x2, x1._sigma * fabs(x2))
def __truediv__(x1, x2): def __truediv__(x1, x2):
'''Divide both mu and sigma by a constant. """Divide both mu and sigma by a constant.
Used for rescaling, perhaps to change measurement units. Used for rescaling, perhaps to change measurement units.
Sigma is scaled with the absolute value of the constant. Sigma is scaled with the absolute value of the constant.
''' """
return NormalDist(x1._mu / x2, x1._sigma / fabs(x2)) return NormalDist(x1._mu / x2, x1._sigma / fabs(x2))
def __pos__(x1): def __pos__(x1):
'Return a copy of the instance.' "Return a copy of the instance."
return NormalDist(x1._mu, x1._sigma) return NormalDist(x1._mu, x1._sigma)
def __neg__(x1): def __neg__(x1):
'Negates mu while keeping sigma the same.' "Negates mu while keeping sigma the same."
return NormalDist(-x1._mu, x1._sigma) return NormalDist(-x1._mu, x1._sigma)
__radd__ = __add__ __radd__ = __add__
def __rsub__(x1, x2): def __rsub__(x1, x2):
'Subtract a NormalDist from a constant or another NormalDist.' "Subtract a NormalDist from a constant or another NormalDist."
return -(x1 - x2) return -(x1 - x2)
__rmul__ = __mul__ __rmul__ = __mul__
def __eq__(x1, x2): def __eq__(x1, x2):
'Two NormalDist objects are equal if their mu and sigma are both equal.' "Two NormalDist objects are equal if their mu and sigma are both equal."
if not isinstance(x2, NormalDist): if not isinstance(x2, NormalDist):
return NotImplemented return NotImplemented
return (x1._mu, x2._sigma) == (x2._mu, x2._sigma) return (x1._mu, x2._sigma) == (x2._mu, x2._sigma)
def __hash__(self): def __hash__(self):
'NormalDist objects hash equal if their mu and sigma are both equal.' "NormalDist objects hash equal if their mu and sigma are both equal."
return hash((self._mu, self._sigma)) return hash((self._mu, self._sigma))
def __repr__(self): def __repr__(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment