diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 72d6a39d5dd6037b203c18b30200482080b7e8bb..eeffd514f764b47ef6357ec933676412698b3bc8 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -372,3 +372,29 @@ sample of size five:: print(f'The sample mean of {mean(data):.1f} has a 90% confidence ' f'interval from {means[1]:.1f} to {means[-2]:.1f}') +Example of a `resampling permutation test +<https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests>`_ +to determine the statistical significance or `p-value +<https://en.wikipedia.org/wiki/P-value>`_ of an observed difference +between the effects of a drug versus a placebo:: + + # Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson + from statistics import mean + from random import shuffle + + drug = [54, 73, 53, 70, 73, 68, 52, 65, 65] + placebo = [54, 51, 58, 44, 55, 52, 42, 47, 58, 46] + observed_diff = mean(drug) - mean(placebo) + + n = 10000 + count = 0 + combined = drug + placebo + for i in range(n): + shuffle(combined) + new_diff = mean(combined[:len(drug)]) - mean(combined[len(drug):]) + count += (new_diff >= observed_diff) + + print(f'{n} label reshufflings produced only {count} instances with a difference') + print(f'at least as extreme as the observed difference of {observed_diff:.1f}.') + print(f'The one-sided p-value of {count / n:.4f} leads us to reject the null') + print(f'hypothesis that the observed difference occurred due to chance.')