forked from AllenDowney/ThinkStats2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclass_size.py
98 lines (73 loc) · 2.19 KB
/
class_size.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import thinkplot
import thinkstats2
def BiasPmf(pmf, name, invert=False):
"""Returns the Pmf with oversampling proportional to value.
If pmf is the distribution of true values, the result is the
distribution that would be seen if values are oversampled in
proportion to their values; for example, if you ask students
how big their classes are, large classes are oversampled in
proportion to their size.
If invert=True, computes in inverse operation; for example,
unbiasing a sample collected from students.
Args:
pmf: Pmf object.
name: string name for the new Pmf.
invert: boolean
Returns:
Pmf object
"""
new_pmf = pmf.Copy(name=name)
for x, p in pmf.Items():
if invert:
new_pmf.Mult(x, 1.0/x)
else:
new_pmf.Mult(x, x)
new_pmf.Normalize()
return new_pmf
def UnbiasPmf(pmf, name):
"""Returns the Pmf with oversampling proportional to 1/value.
Args:
pmf: Pmf object.
name: string name for the new Pmf.
Returns:
Pmf object
"""
return BiasPmf(pmf, name, invert=True)
def ClassSizes():
# start with the actual distribution of class sizes from the book
d = {
7: 8,
12: 8,
17: 14,
22: 4,
27: 6,
32: 12,
37: 8,
42: 3,
47: 2,
}
# form the pmf
pmf = thinkstats2.MakePmfFromDict(d, 'actual')
print 'mean', pmf.Mean()
print 'var', pmf.Var()
# compute the biased pmf
biased_pmf = BiasPmf(pmf, 'observed')
print 'mean', biased_pmf.Mean()
print 'var', biased_pmf.Var()
# unbias the biased pmf
unbiased_pmf = UnbiasPmf(biased_pmf, 'unbiased')
print 'mean', unbiased_pmf.Mean()
print 'var', unbiased_pmf.Var()
# plot the Pmfs
thinkplot.Pmfs([pmf, biased_pmf])
thinkplot.Show(xlabel='Class size',
ylabel='PMF')
def main():
ClassSizes()
if __name__ == '__main__':
main()