forked from hammadhaleem/website-classification-ptm-naive
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_model.py
125 lines (108 loc) · 3.54 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json, MySQLdb
from math import log
from operator import itemgetter
from lib.mysql import connection
from decimal import Decimal
from collections import OrderedDict
## ____Constants_____###
MODEL_DIR = "model/"
#Total in database
data = {
u"econonics": 2021,
u"sports": 2014,
u"religion": 1975,
u"entertain": 2025,
u"health": 2033,
u"politics": 2051,
u"education": 2038
}
categories = [u'entertain',u'politics',u'econonics',u'sports',u'education',u'religion',u'health']
category_counts = {
u'entertain' :json.load(open(MODEL_DIR+"entertain.json")),
u'politics' :json.load(open(MODEL_DIR+"politics.json")),
u'econonics' :json.load(open(MODEL_DIR+"econonics.json")),
u'sports' :json.load(open(MODEL_DIR+"sports.json")),
u'education' :json.load(open(MODEL_DIR+"education.json")),
u'religion' :json.load(open(MODEL_DIR+"religion.json")),
u'health' :json.load(open(MODEL_DIR+"health.json")),
}
#Assuming training data in above files
def create_matrix(l):
dic = dict()
for i in l :
index = str(i[1])
try :
dic[index].append([i[2],i[0]])
except :
dic[index] = []
dic[index].append([i[2],i[0]])
return dic
def Get_accuracy(l):
v=0.0
c=0.0
for i in l :
if (i[0] ==i[1]):
c+=1
else:
v+=1
return (c*100)/(c+v)
def Test(rows): #Rows to test
v = 0
l = []
count = 0
counts = json.load(open(MODEL_DIR+"data.json"))
calculated = {}
for cat in category_counts :
for word in category_counts[cat]:
v += category_counts[cat][word]
print "Total words ", v
for category in category_counts :
attributes = category_counts[category]
n= 0
for word in category_counts[category] :
n+= category_counts[category][word]
calculated[category] = n
print calculated
for row in rows :
count = count +1
pc = {u'entertain':1.0,u'politics':1.0,u'econonics':1.0,u'sports':1.0,u'education':1.0,u'religion':1.0,u'health':1.0}
id = row[1]
cats = json.loads(row[2])
sents = json.loads(row[3])
dp = json.loads(row[4])
for category in category_counts :
n = calculated[category]
pc[category] = Decimal(10**1000)
#for line in sents:
#print dp
#dp = line
for w in dp:
#print word
word = str(w)
if word not in category_counts[category]:
pc[category]=pc[category]*Decimal((1.0/(n+v)))
else:
pc[category]=pc[category]*Decimal(((1.0+category_counts[category][word])/(n+v)))
p=0
Category=u'Unable to decide'
for category in categories:
if p<pc[category]:
Category=category
p=pc[category]
for i in cats:
l.append([Category , i , id ] )
if (count % 200 == 0 ):
x =create_matrix(l)
print "Tested on" , count ,"Accuracy" , Get_accuracy(l)
f=open("choas","w")
f.write(json.dumps(x))
f.close()
x =create_matrix(l)
f=open("choas","w")
f.write(json.dumps(x))
f.close()
cursor = connection.cursor()
query ="SELECT * FROM `data_new` WHERE 1 "
cursor.execute(query)
rows = cursor.fetchall()
Test(rows)