Commit 47d99872 authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

bpo-35904: Add statistics.fmean() (GH-11892)

parent f36f8925
...@@ -404,7 +404,7 @@ with replacement to estimate a confidence interval for the mean of a sample of ...@@ -404,7 +404,7 @@ with replacement to estimate a confidence interval for the mean of a sample of
size five:: size five::
# http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm # http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm
from statistics import mean from statistics import fmean as mean
from random import choices from random import choices
data = 1, 2, 4, 4, 10 data = 1, 2, 4, 4, 10
...@@ -419,7 +419,7 @@ to determine the statistical significance or `p-value ...@@ -419,7 +419,7 @@ to determine the statistical significance or `p-value
between the effects of a drug versus a placebo:: between the effects of a drug versus a placebo::
# Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson # Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson
from statistics import mean from statistics import fmean as mean
from random import shuffle from random import shuffle
drug = [54, 73, 53, 70, 73, 68, 52, 65, 65] drug = [54, 73, 53, 70, 73, 68, 52, 65, 65]
......
...@@ -39,6 +39,7 @@ or sample. ...@@ -39,6 +39,7 @@ or sample.
======================= ============================================= ======================= =============================================
:func:`mean` Arithmetic mean ("average") of data. :func:`mean` Arithmetic mean ("average") of data.
:func:`fmean` Fast, floating point arithmetic mean.
:func:`harmonic_mean` Harmonic mean of data. :func:`harmonic_mean` Harmonic mean of data.
:func:`median` Median (middle value) of data. :func:`median` Median (middle value) of data.
:func:`median_low` Low median of data. :func:`median_low` Low median of data.
...@@ -111,6 +112,23 @@ However, for reading convenience, most of the examples show sorted sequences. ...@@ -111,6 +112,23 @@ However, for reading convenience, most of the examples show sorted sequences.
``mean(data)`` is equivalent to calculating the true population mean μ. ``mean(data)`` is equivalent to calculating the true population mean μ.
.. function:: fmean(data)
Convert *data* to floats and compute the arithmetic mean.
This runs faster than the :func:`mean` function and it always returns a
:class:`float`. The result is highly accurate but not as perfect as
:func:`mean`. If the input dataset is empty, raises a
:exc:`StatisticsError`.
.. doctest::
>>> fmean([3.5, 4.0, 5.25])
4.25
.. versionadded:: 3.8
.. function:: harmonic_mean(data) .. function:: harmonic_mean(data)
Return the harmonic mean of *data*, a sequence or iterator of Return the harmonic mean of *data*, a sequence or iterator of
......
...@@ -254,6 +254,15 @@ Added :attr:`SSLContext.post_handshake_auth` to enable and ...@@ -254,6 +254,15 @@ Added :attr:`SSLContext.post_handshake_auth` to enable and
post-handshake authentication. post-handshake authentication.
(Contributed by Christian Heimes in :issue:`34670`.) (Contributed by Christian Heimes in :issue:`34670`.)
statistics
----------
Added :func:`statistics.fmean` as a faster, floating point variant of
:func:`statistics.mean()`. (Contributed by Raymond Hettinger and
Steven D'Aprano in :issue:`35904`.)
tokenize tokenize
-------- --------
......
...@@ -79,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError. ...@@ -79,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
__all__ = [ 'StatisticsError', __all__ = [ 'StatisticsError',
'pstdev', 'pvariance', 'stdev', 'variance', 'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped', 'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode', 'harmonic_mean', 'mean', 'mode', 'harmonic_mean', 'fmean',
] ]
import collections import collections
...@@ -312,6 +312,33 @@ def mean(data): ...@@ -312,6 +312,33 @@ def mean(data):
assert count == n assert count == n
return _convert(total/n, T) return _convert(total/n, T)
def fmean(data):
""" Convert data to floats and compute the arithmetic mean.
This runs faster than the mean() function and it always returns a float.
The result is highly accurate but not as perfect as mean().
If the input dataset is empty, it raises a StatisticsError.
>>> fmean([3.5, 4.0, 5.25])
4.25
"""
try:
n = len(data)
except TypeError:
# Handle iterators that do not define __len__().
n = 0
def count(x):
nonlocal n
n += 1
return x
total = math.fsum(map(count, data))
else:
total = math.fsum(data)
try:
return total / n
except ZeroDivisionError:
raise StatisticsError('fmean requires at least one data point') from None
def harmonic_mean(data): def harmonic_mean(data):
"""Return the harmonic mean of data. """Return the harmonic mean of data.
......
...@@ -1810,6 +1810,51 @@ class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin): ...@@ -1810,6 +1810,51 @@ class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
# counts, this should raise. # counts, this should raise.
self.assertRaises(statistics.StatisticsError, self.func, data) self.assertRaises(statistics.StatisticsError, self.func, data)
class TestFMean(unittest.TestCase):
def test_basics(self):
fmean = statistics.fmean
D = Decimal
F = Fraction
for data, expected_mean, kind in [
([3.5, 4.0, 5.25], 4.25, 'floats'),
([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
([True, False, True, True, False], 0.60, 'booleans'),
([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
((3.5, 4.0, 5.25), 4.25, 'tuple'),
(iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
]:
actual_mean = fmean(data)
self.assertIs(type(actual_mean), float, kind)
self.assertEqual(actual_mean, expected_mean, kind)
def test_error_cases(self):
fmean = statistics.fmean
StatisticsError = statistics.StatisticsError
with self.assertRaises(StatisticsError):
fmean([]) # empty input
with self.assertRaises(StatisticsError):
fmean(iter([])) # empty iterator
with self.assertRaises(TypeError):
fmean(None) # non-iterable input
with self.assertRaises(TypeError):
fmean([10, None, 20]) # non-numeric input
with self.assertRaises(TypeError):
fmean() # missing data argument
with self.assertRaises(TypeError):
fmean([10, 20, 60], 70) # too many arguments
def test_special_values(self):
# Rules for special values are inherited from math.fsum()
fmean = statistics.fmean
NaN = float('Nan')
Inf = float('Inf')
self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
with self.assertRaises(ValueError):
fmean([Inf, -Inf])
# === Tests for variances and standard deviations === # === Tests for variances and standard deviations ===
......
Added statistics.fmean() as a faster, floating point variant of the existing
mean() function.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment