Commit 6463ba30 authored by Raymond Hettinger's avatar Raymond Hettinger Committed by GitHub

bpo-27181: Add statistics.geometric_mean() (GH-12638)

parent 9d7b2c09
...@@ -40,6 +40,7 @@ or sample. ...@@ -40,6 +40,7 @@ or sample.
======================= =============================================================== ======================= ===============================================================
:func:`mean` Arithmetic mean ("average") of data. :func:`mean` Arithmetic mean ("average") of data.
:func:`fmean` Fast, floating point arithmetic mean. :func:`fmean` Fast, floating point arithmetic mean.
:func:`geometric_mean` Geometric mean of data.
:func:`harmonic_mean` Harmonic mean of data. :func:`harmonic_mean` Harmonic mean of data.
:func:`median` Median (middle value) of data. :func:`median` Median (middle value) of data.
:func:`median_low` Low median of data. :func:`median_low` Low median of data.
...@@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences. ...@@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences.
.. versionadded:: 3.8 .. versionadded:: 3.8
.. function:: geometric_mean(data)
Convert *data* to floats and compute the geometric mean.
Raises a :exc:`StatisticsError` if the input dataset is empty,
if it contains a zero, or if it contains a negative value.
No special efforts are made to achieve exact results.
(However, this may change in the future.)
.. doctest::
>>> round(geometric_mean([54, 24, 36]), 9)
36.0
.. versionadded:: 3.8
.. function:: harmonic_mean(data) .. function:: harmonic_mean(data)
Return the harmonic mean of *data*, a sequence or iterator of Return the harmonic mean of *data*, a sequence or iterator of
......
...@@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of ...@@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of
:func:`statistics.mean()`. (Contributed by Raymond Hettinger and :func:`statistics.mean()`. (Contributed by Raymond Hettinger and
Steven D'Aprano in :issue:`35904`.) Steven D'Aprano in :issue:`35904`.)
Added :func:`statistics.geometric_mean()`
(Contributed by Raymond Hettinger in :issue:`27181`.)
Added :func:`statistics.multimode` that returns a list of the most Added :func:`statistics.multimode` that returns a list of the most
common values. (Contributed by Raymond Hettinger in :issue:`35892`.) common values. (Contributed by Raymond Hettinger in :issue:`35892`.)
......
...@@ -11,13 +11,14 @@ Calculating averages ...@@ -11,13 +11,14 @@ Calculating averages
Function Description Function Description
================== ============================================= ================== =============================================
mean Arithmetic mean (average) of data. mean Arithmetic mean (average) of data.
geometric_mean Geometric mean of data.
harmonic_mean Harmonic mean of data. harmonic_mean Harmonic mean of data.
median Median (middle value) of data. median Median (middle value) of data.
median_low Low median of data. median_low Low median of data.
median_high High median of data. median_high High median of data.
median_grouped Median, or 50th percentile, of grouped data. median_grouped Median, or 50th percentile, of grouped data.
mode Mode (most common value) of data. mode Mode (most common value) of data.
multimode List of modes (most common values of data) multimode List of modes (most common values of data).
================== ============================================= ================== =============================================
Calculate the arithmetic mean ("the average") of data: Calculate the arithmetic mean ("the average") of data:
...@@ -81,6 +82,7 @@ __all__ = [ 'StatisticsError', 'NormalDist', ...@@ -81,6 +82,7 @@ __all__ = [ 'StatisticsError', 'NormalDist',
'pstdev', 'pvariance', 'stdev', 'variance', 'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped', 'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
'geometric_mean',
] ]
import math import math
...@@ -328,6 +330,24 @@ def fmean(data): ...@@ -328,6 +330,24 @@ def fmean(data):
except ZeroDivisionError: except ZeroDivisionError:
raise StatisticsError('fmean requires at least one data point') from None raise StatisticsError('fmean requires at least one data point') from None
def geometric_mean(data):
"""Convert data to floats and compute the geometric mean.
Raises a StatisticsError if the input dataset is empty,
if it contains a zero, or if it contains a negative value.
No special efforts are made to achieve exact results.
(However, this may change in the future.)
>>> round(geometric_mean([54, 24, 36]), 9)
36.0
"""
try:
return exp(fmean(map(log, data)))
except ValueError:
raise StatisticsError('geometric mean requires a non-empty dataset '
' containing positive numbers') from None
def harmonic_mean(data): def harmonic_mean(data):
"""Return the harmonic mean of data. """Return the harmonic mean of data.
......
...@@ -2038,6 +2038,94 @@ class TestStdev(VarianceStdevMixin, NumericTestCase): ...@@ -2038,6 +2038,94 @@ class TestStdev(VarianceStdevMixin, NumericTestCase):
expected = math.sqrt(statistics.variance(data)) expected = math.sqrt(statistics.variance(data))
self.assertEqual(self.func(data), expected) self.assertEqual(self.func(data), expected)
class TestGeometricMean(unittest.TestCase):
def test_basics(self):
geometric_mean = statistics.geometric_mean
self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
random.seed(86753095551212)
for rng in [
range(1, 100),
range(1, 1_000),
range(1, 10_000),
range(500, 10_000, 3),
range(10_000, 500, -3),
[12, 17, 13, 5, 120, 7],
[random.expovariate(50.0) for i in range(1_000)],
[random.lognormvariate(20.0, 3.0) for i in range(2_000)],
[random.triangular(2000, 3000, 2200) for i in range(3_000)],
]:
gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
gm_float = geometric_mean(rng)
self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
def test_various_input_types(self):
geometric_mean = statistics.geometric_mean
D = Decimal
F = Fraction
# https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
expected_mean = 4.18886
for data, kind in [
([3.5, 4.0, 5.25], 'floats'),
([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
([3.5, 4, F(21, 4)], 'mixed types'),
((3.5, 4.0, 5.25), 'tuple'),
(iter([3.5, 4.0, 5.25]), 'iterator'),
]:
actual_mean = geometric_mean(data)
self.assertIs(type(actual_mean), float, kind)
self.assertAlmostEqual(actual_mean, expected_mean, places=5)
def test_big_and_small(self):
geometric_mean = statistics.geometric_mean
# Avoid overflow to infinity
large = 2.0 ** 1000
big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
self.assertTrue(math.isclose(big_gm, 36.0 * large))
self.assertFalse(math.isinf(big_gm))
# Avoid underflow to zero
small = 2.0 ** -1000
small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
self.assertTrue(math.isclose(small_gm, 36.0 * small))
self.assertNotEqual(small_gm, 0.0)
def test_error_cases(self):
geometric_mean = statistics.geometric_mean
StatisticsError = statistics.StatisticsError
with self.assertRaises(StatisticsError):
geometric_mean([]) # empty input
with self.assertRaises(StatisticsError):
geometric_mean([3.5, 0.0, 5.25]) # zero input
with self.assertRaises(StatisticsError):
geometric_mean([3.5, -4.0, 5.25]) # negative input
with self.assertRaises(StatisticsError):
geometric_mean(iter([])) # empty iterator
with self.assertRaises(TypeError):
geometric_mean(None) # non-iterable input
with self.assertRaises(TypeError):
geometric_mean([10, None, 20]) # non-numeric input
with self.assertRaises(TypeError):
geometric_mean() # missing data argument
with self.assertRaises(TypeError):
geometric_mean([10, 20, 60], 70) # too many arguments
def test_special_values(self):
# Rules for special values are inherited from math.fsum()
geometric_mean = statistics.geometric_mean
NaN = float('Nan')
Inf = float('Inf')
self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
with self.assertRaises(ValueError):
geometric_mean([Inf, -Inf])
class TestNormalDist(unittest.TestCase): class TestNormalDist(unittest.TestCase):
# General note on precision: The pdf(), cdf(), and overlap() methods # General note on precision: The pdf(), cdf(), and overlap() methods
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment