forked from AllenDowney/ThinkStats2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgini.py
80 lines (54 loc) · 1.56 KB
/
gini.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""This file contains code for use with "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2010 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import math
import sys
import irs
import Pmf
import Cdf
def PmfMean(pmf):
total = 0.0
for val, p in pmf.Items():
total += p * val
return total
def PmfMoment(pmf, mean=None, exponent=2):
if mean is None:
mean = PmfMean(pmf)
total = 0.0
for val, p in pmf.Items():
total += p * (val - mean)**exponent
return total
def RelativeMeanDifference(pmf, mean=None):
if mean is None:
mean = PmfMean(pmf)
diff = Pmf.Pmf()
for v1, p1 in pmf.Items():
for v2, p2 in pmf.Items():
diff.Incr(abs(v1-v2), p1*p2)
print PmfMean(diff), mean
return PmfMean(diff) / mean
def SummarizeData(pmf, cdf):
mean = PmfMean(pmf)
print 'mean:', mean
median = cdf.Percentile(50)
print 'median:', median
fraction_below_mean = cdf.Prob(mean)
print 'fraction below mean:', fraction_below_mean
m2 = PmfMoment(pmf, mean, 2)
m3 = PmfMoment(pmf, mean, 3)
sigma = math.sqrt(m2)
print 'sigma:', sigma
g1 = m3 / m2**(3/2)
print 'skewness:', g1
gp = 3 * (mean - median) / sigma
print 'Pearsons skewness:', gp
gini = RelativeMeanDifference(pmf) / 2
print 'gini', gini
def main(script, *args):
data = irs.ReadIncomeFile()
hist, pmf, cdf = irs.MakeIncomeDist(data)
SummarizeData(pmf, cdf)
if __name__ == "__main__":
main(*sys.argv)