Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
318d537d
Commit
318d537d
authored
Mar 06, 2019
by
Raymond Hettinger
Committed by
GitHub
Mar 06, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-36169 : Add overlap() method to statistics.NormalDist (GH-12149)
parent
e942e7b5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
132 additions
and
1 deletion
+132
-1
Doc/library/statistics.rst
Doc/library/statistics.rst
+32
-0
Lib/statistics.py
Lib/statistics.py
+36
-1
Lib/test/test_statistics.py
Lib/test/test_statistics.py
+62
-0
Misc/NEWS.d/next/Library/2019-03-03-11-37-09.bpo-36169.8nWJy7.rst
...S.d/next/Library/2019-03-03-11-37-09.bpo-36169.8nWJy7.rst
+2
-0
No files found.
Doc/library/statistics.rst
View file @
318d537d
...
...
@@ -549,6 +549,28 @@ of applications in statistics, including simulations and hypothesis testing.
compute the probability that a random variable *X* will be less than or
equal to *x*. Mathematically, it is written ``P(X <= x)``.
.. method:: NormalDist.overlap(other)
Compute the `overlapping coefficient (OVL)
<http://www.iceaaonline.com/ready/wp-content/uploads/2014/06/MM-9-Presentation-Meet-the-Overlapping-Coefficient-A-Measure-for-Elevator-Speeches.pdf>`_
between two normal distributions.
Measures the agreement between two normal probability distributions.
Returns a value between 0.0 and 1.0 giving the overlapping area for
two probability density functions.
In this `example from John M. Linacre
<https://www.rasch.org/rmt/rmt101r.htm>`_ about 80% of each
distribution overlaps the other:
.. doctest::
>>> N1 = NormalDist(2.4, 1.6)
>>> N2 = NormalDist(3.2, 2.0)
>>> ovl = N1.overlap(N2)
>>> f'{ovl * 100.0 :.1f}%'
'80.4%'
Instances of :class:`NormalDist` support addition, subtraction,
multiplication and division by a constant. These operations
are used for translation and scaling. For example:
...
...
@@ -595,6 +617,16 @@ determine the percentage of students with scores between 1100 and 1200:
>>> f'{fraction * 100 :.1f}% score between 1100 and 1200'
'18.2% score between 1100 and 1200'
What percentage of men and women will have the same height in `two normally
distributed populations with known means and standard deviations
<http://www.usablestats.com/lessons/normal>`_?
>>> men = NormalDist(70, 4)
>>> women = NormalDist(65, 3.5)
>>> ovl = men.overlap(women)
>>> round(ovl * 100.0, 1)
50.3
To estimate the distribution for a model than isn't easy to solve
analytically, :class:`NormalDist` can generate input samples for a `Monte
Carlo simulation <https://en.wikipedia.org/wiki/Monte_Carlo_method>`_ of the
...
...
Lib/statistics.py
View file @
318d537d
...
...
@@ -91,7 +91,7 @@ from fractions import Fraction
from
decimal
import
Decimal
from
itertools
import
groupby
from
bisect
import
bisect_left
,
bisect_right
from
math
import
hypot
,
sqrt
,
fabs
,
exp
,
erf
,
tau
from
math
import
hypot
,
sqrt
,
fabs
,
exp
,
erf
,
tau
,
log
,
fsum
...
...
@@ -740,6 +740,41 @@ class NormalDist:
raise
StatisticsError
(
'cdf() not defined when sigma is zero'
)
return
0.5
*
(
1.0
+
erf
((
x
-
self
.
mu
)
/
(
self
.
sigma
*
sqrt
(
2.0
))))
def
overlap
(
self
,
other
):
'''Compute the overlapping coefficient (OVL) between two normal distributions.
Measures the agreement between two normal probability distributions.
Returns a value between 0.0 and 1.0 giving the overlapping area in
the two underlying probability density functions.
>>> N1 = NormalDist(2.4, 1.6)
>>> N2 = NormalDist(3.2, 2.0)
>>> N1.overlap(N2)
0.8035050657330205
'''
# See: "The overlapping coefficient as a measure of agreement between
# probability distributions and point estimation of the overlap of two
# normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
# http://dx.doi.org/10.1080/03610928908830127
if
not
isinstance
(
other
,
NormalDist
):
raise
TypeError
(
'Expected another NormalDist instance'
)
X
,
Y
=
self
,
other
if
(
Y
.
sigma
,
Y
.
mu
)
<
(
X
.
sigma
,
X
.
mu
):
# sort to assure commutativity
X
,
Y
=
Y
,
X
X_var
,
Y_var
=
X
.
variance
,
Y
.
variance
if
not
X_var
or
not
Y_var
:
raise
StatisticsError
(
'overlap() not defined when sigma is zero'
)
dv
=
Y_var
-
X_var
dm
=
fabs
(
Y
.
mu
-
X
.
mu
)
if
not
dv
:
return
2.0
*
NormalDist
(
dm
,
2.0
*
X
.
sigma
).
cdf
(
0
)
a
=
X
.
mu
*
Y_var
-
Y
.
mu
*
X_var
b
=
X
.
sigma
*
Y
.
sigma
*
sqrt
(
dm
**
2.0
+
dv
*
log
(
Y_var
/
X_var
))
x1
=
(
a
+
b
)
/
dv
x2
=
(
a
-
b
)
/
dv
return
1.0
-
(
fabs
(
Y
.
cdf
(
x1
)
-
X
.
cdf
(
x1
))
+
fabs
(
Y
.
cdf
(
x2
)
-
X
.
cdf
(
x2
)))
@
property
def
mean
(
self
):
'Arithmetic mean of the normal distribution'
...
...
Lib/test/test_statistics.py
View file @
318d537d
...
...
@@ -2162,6 +2162,68 @@ class TestNormalDist(unittest.TestCase):
self
.
assertEqual
(
X
.
cdf
(
float
(
'Inf'
)),
1.0
)
self
.
assertTrue
(
math
.
isnan
(
X
.
cdf
(
float
(
'NaN'
))))
def
test_overlap
(
self
):
NormalDist
=
statistics
.
NormalDist
# Match examples from Imman and Bradley
for
X1
,
X2
,
published_result
in
[
(
NormalDist
(
0.0
,
2.0
),
NormalDist
(
1.0
,
2.0
),
0.80258
),
(
NormalDist
(
0.0
,
1.0
),
NormalDist
(
1.0
,
2.0
),
0.60993
),
]:
self
.
assertAlmostEqual
(
X1
.
overlap
(
X2
),
published_result
,
places
=
4
)
self
.
assertAlmostEqual
(
X2
.
overlap
(
X1
),
published_result
,
places
=
4
)
# Check against integration of the PDF
def
overlap_numeric
(
X
,
Y
,
*
,
steps
=
8_192
,
z
=
5
):
'Numerical integration cross-check for overlap() '
fsum
=
math
.
fsum
center
=
(
X
.
mu
+
Y
.
mu
)
/
2.0
width
=
z
*
max
(
X
.
sigma
,
Y
.
sigma
)
start
=
center
-
width
dx
=
2.0
*
width
/
steps
x_arr
=
[
start
+
i
*
dx
for
i
in
range
(
steps
)]
xp
=
list
(
map
(
X
.
pdf
,
x_arr
))
yp
=
list
(
map
(
Y
.
pdf
,
x_arr
))
total
=
max
(
fsum
(
xp
),
fsum
(
yp
))
return
fsum
(
map
(
min
,
xp
,
yp
))
/
total
for
X1
,
X2
in
[
# Examples from Imman and Bradley
(
NormalDist
(
0.0
,
2.0
),
NormalDist
(
1.0
,
2.0
)),
(
NormalDist
(
0.0
,
1.0
),
NormalDist
(
1.0
,
2.0
)),
# Example from https://www.rasch.org/rmt/rmt101r.htm
(
NormalDist
(
0.0
,
1.0
),
NormalDist
(
1.0
,
2.0
)),
# Gender heights from http://www.usablestats.com/lessons/normal
(
NormalDist
(
70
,
4
),
NormalDist
(
65
,
3.5
)),
# Misc cases with equal standard deviations
(
NormalDist
(
100
,
15
),
NormalDist
(
110
,
15
)),
(
NormalDist
(
-
100
,
15
),
NormalDist
(
110
,
15
)),
(
NormalDist
(
-
100
,
15
),
NormalDist
(
-
110
,
15
)),
# Misc cases with unequal standard deviations
(
NormalDist
(
100
,
12
),
NormalDist
(
110
,
15
)),
(
NormalDist
(
100
,
12
),
NormalDist
(
150
,
15
)),
(
NormalDist
(
100
,
12
),
NormalDist
(
150
,
35
)),
# Misc cases with small values
(
NormalDist
(
1.000
,
0.002
),
NormalDist
(
1.001
,
0.003
)),
(
NormalDist
(
1.000
,
0.002
),
NormalDist
(
1.006
,
0.0003
)),
(
NormalDist
(
1.000
,
0.002
),
NormalDist
(
1.001
,
0.099
)),
]:
self
.
assertAlmostEqual
(
X1
.
overlap
(
X2
),
overlap_numeric
(
X1
,
X2
),
places
=
5
)
self
.
assertAlmostEqual
(
X2
.
overlap
(
X1
),
overlap_numeric
(
X1
,
X2
),
places
=
5
)
# Error cases
X
=
NormalDist
()
with
self
.
assertRaises
(
TypeError
):
X
.
overlap
()
# too few arguments
with
self
.
assertRaises
(
TypeError
):
X
.
overlap
(
X
,
X
)
# too may arguments
with
self
.
assertRaises
(
TypeError
):
X
.
overlap
(
None
)
# right operand not a NormalDist
with
self
.
assertRaises
(
statistics
.
StatisticsError
):
X
.
overlap
(
NormalDist
(
1
,
0
))
# right operand sigma is zero
with
self
.
assertRaises
(
statistics
.
StatisticsError
):
NormalDist
(
1
,
0
).
overlap
(
X
)
# left operand sigma is zero
def
test_properties
(
self
):
X
=
statistics
.
NormalDist
(
100
,
15
)
self
.
assertEqual
(
X
.
mean
,
100
)
...
...
Misc/NEWS.d/next/Library/2019-03-03-11-37-09.bpo-36169.8nWJy7.rst
0 → 100644
View file @
318d537d
Add overlap() method to statistics.NormalDist. Computes the overlapping
coefficient for two normal distributions.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment