Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
4db25d5c
Commit
4db25d5c
authored
Sep 08, 2019
by
Raymond Hettinger
Committed by
GitHub
Sep 08, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-36018: Address more reviewer feedback (GH-15733)
parent
3c87a667
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
69 additions
and
39 deletions
+69
-39
Doc/library/statistics.rst
Doc/library/statistics.rst
+27
-14
Lib/statistics.py
Lib/statistics.py
+27
-5
Lib/test/test_statistics.py
Lib/test/test_statistics.py
+15
-20
No files found.
Doc/library/statistics.rst
View file @
4db25d5c
...
...
@@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
*n* to 100 for percentiles which gives the 99 cuts points that separate
*data* in
to 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
*data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
is not least 1.
The *data* can be any iterable containing sample data or it can be an
instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
The *data* can be any iterable containing sample data. For meaningful
results, the number of data points in *data* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points.
For sample data, t
he cut points are linearly interpolated from the
T
he cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third
of the distance between two sample values, ``100`` and ``112``, the
cut-point will evaluate to ``104``.
...
...
@@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences.
values, the method sorts them and assigns the following percentiles:
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
If *data* is an instance of a class that defines an
:meth:`~inv_cdf` method, setting *method* has no effect.
.. doctest::
# Decile cut points for empirically sampled data
...
...
@@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences.
>>> [round(q, 1) for q in quantiles(data, n=10)]
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
>>> # Quartile cut points for the standard normal distribution
>>> Z = NormalDist()
>>> [round(q, 4) for q in quantiles(Z, n=4)]
[-0.6745, 0.0, 0.6745]
.. versionadded:: 3.8
...
...
@@ -607,6 +598,18 @@ of applications in statistics.
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
distribution.
.. attribute:: median
A read-only property for the `median
<https://en.wikipedia.org/wiki/Median>`_ of a normal
distribution.
.. attribute:: mode
A read-only property for the `mode
<https://en.wikipedia.org/wiki/Mode_(statistics)>`_ of a normal
distribution.
.. attribute:: stdev
A read-only property for the `standard deviation
...
...
@@ -678,6 +681,16 @@ of applications in statistics.
the two probability density functions
<https://www.rasch.org/rmt/rmt101r.htm>`_.
.. method:: NormalDist.quantiles()
Divide the normal distribution into *n* continuous intervals with
equal probability. Returns a list of (n - 1) cut points separating
the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate the normal distribution into 100 equal sized groups.
Instances of :class:`NormalDist` support addition, subtraction,
multiplication and division by a constant. These operations
are used for translation and scaling. For example:
...
...
@@ -733,9 +746,9 @@ Find the `quartiles <https://en.wikipedia.org/wiki/Quartile>`_ and `deciles
.. doctest::
>>> list(map(round,
quantiles(sat
)))
>>> list(map(round,
sat.quantiles(
)))
[928, 1060, 1192]
>>> list(map(round,
quantiles(sat,
n=10)))
>>> list(map(round,
sat.quantiles(
n=10)))
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
To estimate the distribution for a model than isn't easy to solve
...
...
Lib/statistics.py
View file @
4db25d5c
...
...
@@ -624,9 +624,8 @@ def quantiles(data, /, *, n=4, method='exclusive'):
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate *data* in to 100 equal sized groups.
The *data* can be any iterable containing sample data or it can be
an instance of a class that defines an inv_cdf() method. For sample
data, the cut points are linearly interpolated between data points.
The *data* can be any iterable containing sample.
The cut points are linearly interpolated between data points.
If *method* is set to *inclusive*, *data* is treated as population
data. The minimum value is treated as the 0th percentile and the
...
...
@@ -634,8 +633,6 @@ def quantiles(data, /, *, n=4, method='exclusive'):
"""
if
n
<
1
:
raise
StatisticsError
(
'n must be at least 1'
)
if
hasattr
(
data
,
'inv_cdf'
):
return
[
data
.
inv_cdf
(
i
/
n
)
for
i
in
range
(
1
,
n
)]
data
=
sorted
(
data
)
ld
=
len
(
data
)
if
ld
<
2
:
...
...
@@ -955,6 +952,17 @@ class NormalDist:
raise
StatisticsError
(
'cdf() not defined when sigma at or below zero'
)
return
_normal_dist_inv_cdf
(
p
,
self
.
_mu
,
self
.
_sigma
)
def
quantiles
(
self
,
n
=
4
):
"""Divide into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate the normal distribution in to 100 equal sized groups.
"""
return
[
self
.
inv_cdf
(
i
/
n
)
for
i
in
range
(
1
,
n
)]
def
overlap
(
self
,
other
):
"""Compute the overlapping coefficient (OVL) between two normal distributions.
...
...
@@ -994,6 +1002,20 @@ class NormalDist:
"Arithmetic mean of the normal distribution."
return
self
.
_mu
@
property
def
median
(
self
):
"Return the median of the normal distribution"
return
self
.
_mu
@
property
def
mode
(
self
):
"""Return the mode of the normal distribution
The mode is the value x where which the probability density
function (pdf) takes its maximum value.
"""
return
self
.
_mu
@
property
def
stdev
(
self
):
"Standard deviation of the normal distribution."
...
...
Lib/test/test_statistics.py
View file @
4db25d5c
...
...
@@ -2198,16 +2198,6 @@ class TestQuantiles(unittest.TestCase):
exp
=
list
(
map
(
f
,
expected
))
act
=
quantiles
(
map
(
f
,
data
),
n
=
n
)
self
.
assertTrue
(
all
(
math
.
isclose
(
e
,
a
)
for
e
,
a
in
zip
(
exp
,
act
)))
# Quartiles of a standard normal distribution
for
n
,
expected
in
[
(
1
,
[]),
(
2
,
[
0.0
]),
(
3
,
[
-
0.4307
,
0.4307
]),
(
4
,[
-
0.6745
,
0.0
,
0.6745
]),
]:
actual
=
quantiles
(
statistics
.
NormalDist
(),
n
=
n
)
self
.
assertTrue
(
all
(
math
.
isclose
(
e
,
a
,
abs_tol
=
0.0001
)
for
e
,
a
in
zip
(
expected
,
actual
)))
# Q2 agrees with median()
for
k
in
range
(
2
,
60
):
data
=
random
.
choices
(
range
(
100
),
k
=
k
)
...
...
@@ -2248,16 +2238,6 @@ class TestQuantiles(unittest.TestCase):
exp
=
list
(
map
(
f
,
expected
))
act
=
quantiles
(
map
(
f
,
data
),
n
=
n
,
method
=
"inclusive"
)
self
.
assertTrue
(
all
(
math
.
isclose
(
e
,
a
)
for
e
,
a
in
zip
(
exp
,
act
)))
# Quartiles of a standard normal distribution
for
n
,
expected
in
[
(
1
,
[]),
(
2
,
[
0.0
]),
(
3
,
[
-
0.4307
,
0.4307
]),
(
4
,[
-
0.6745
,
0.0
,
0.6745
]),
]:
actual
=
quantiles
(
statistics
.
NormalDist
(),
n
=
n
,
method
=
"inclusive"
)
self
.
assertTrue
(
all
(
math
.
isclose
(
e
,
a
,
abs_tol
=
0.0001
)
for
e
,
a
in
zip
(
expected
,
actual
)))
# Natural deciles
self
.
assertEqual
(
quantiles
([
0
,
100
],
n
=
10
,
method
=
'inclusive'
),
[
10.0
,
20.0
,
30.0
,
40.0
,
50.0
,
60.0
,
70.0
,
80.0
,
90.0
])
...
...
@@ -2546,6 +2526,19 @@ class TestNormalDist:
# Special values
self
.
assertTrue
(
math
.
isnan
(
Z
.
inv_cdf
(
float
(
'NaN'
))))
def
test_quantiles
(
self
):
# Quartiles of a standard normal distribution
Z
=
self
.
module
.
NormalDist
()
for
n
,
expected
in
[
(
1
,
[]),
(
2
,
[
0.0
]),
(
3
,
[
-
0.4307
,
0.4307
]),
(
4
,[
-
0.6745
,
0.0
,
0.6745
]),
]:
actual
=
Z
.
quantiles
(
n
=
n
)
self
.
assertTrue
(
all
(
math
.
isclose
(
e
,
a
,
abs_tol
=
0.0001
)
for
e
,
a
in
zip
(
expected
,
actual
)))
def
test_overlap
(
self
):
NormalDist
=
self
.
module
.
NormalDist
...
...
@@ -2612,6 +2605,8 @@ class TestNormalDist:
def
test_properties
(
self
):
X
=
self
.
module
.
NormalDist
(
100
,
15
)
self
.
assertEqual
(
X
.
mean
,
100
)
self
.
assertEqual
(
X
.
median
,
100
)
self
.
assertEqual
(
X
.
mode
,
100
)
self
.
assertEqual
(
X
.
stdev
,
15
)
self
.
assertEqual
(
X
.
variance
,
225
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment