#PROGRAM-1:IMPLEMENT AND DEMONSTRATE THE FIND-S ALGORITHM FOR FINDING THE MOST
SPECIFIC HYPOTHESIS BASED ON A GIVEN SET OF TRAINING DATA SAMPLES. READ THE
TRAINING DATA FROM A .CSV FILE.
program1.py
import csv
with open('program1.csv','r') as f:
reader = csv.reader(f)
your_list = list(reader)
h = [['0','0','0','0','0','0']]
for i in your_list:
print(i)
if i[-1]=="True":
j = 0
for x in i:
if x != "True":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j = j+1
print("most specific hypothesis is")
print(h)
#DATASET FOR PROGRAM1:
program1.csv
Sky,AirTemp,Humidity,Wind,Water,Forecast,EnjoySport
Sunny,Warm,Normal,Strong,Warm,Same,True
Sunny,Warm,High,Strong,Warm,Same,True
Rainy,Cold,High,Strong,Warm,Change,False
Sunny,Warm,High,Strong,Cool,Change,True
#OUTPUT FOR PROGRAM-1
['Sky', 'AirTemp', 'Humidity', 'Wind', 'Water', 'Forecast', 'EnjoySport']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'True']
most specific hypothesis is
[['Sunny', '0', '0', '0', '0', '0']]
most specific hypothesis is
[['Sunny', 'Warm', '0', '0', '0', '0']]
most specific hypothesis is
[['Sunny', 'Warm', 'Normal', '0', '0', '0']]
most specific hypothesis is
[['Sunny', 'Warm', 'Normal', 'Strong', '0', '0']]
most specific hypothesis is
[['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', '0']]
most specific hypothesis is
[['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']]
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'True']
most specific hypothesis is
[['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'False']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'True']
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', '?', 'Same']]
most specific hypothesis is
[['Sunny', 'Warm', '?', 'Strong', '?', '?']]
#PROGRAM-2:FOR A GIVEN SET OF TRAINING DATA EXAMPLES STORED IN A .CSV FILE,
IMPLEMENT AND DEMONSTRATE THE CANDIDATE-ELIMINATION ALGORITHMTO OUTPUT A
DESCRIPTION OF THE SET OF ALL HYPOTHESES CONSISTENT WITH THE TRAINING EXAMPLES.
program2.py
import pandas as pd
df=pd.read_csv("program2.csv")
spe_df=df.loc[df["enjoysport"].str.upper()=="YES"]
gene_df=df.loc[df["enjoysport"].str.upper()=="NO"]
spe_df=spe_df.iloc[:,:-1]
gene_df=gene_df.iloc[:,:-1]
base=spe_df.iloc[0]
for x in range(1,len(spe_df)):
base=base.where(spe_df.iloc[x]==base,other="???")
print("Specific:-\n",base.values)
for x in range(len(gene_df)):
base=base.where(base!=gene_df.iloc[x],other="???")
print("General")
for i,x in enumerate(base):
if x!="???":
l=["???"]*len(base)
l[i]=x
print(l)
#DATASET FOR PROGRAM-2
program2.csv
sky,airtemp,humidity,wind,water,forecast,enjoysport
sunny,warm,normal,strong,warm,same,YES
sunny,warm,high,strong,warm,same,YES
rainy,cold,high,strong,warm,change,NO
sunny,warm,high,strong,cool,change,YES
#OUTPUT FOR PROGRAM-2
Specific:-
['sunny' 'warm' '???' 'strong' '???' '???']
General
['sunny', '???', '???', '???', '???', '???']
['???', 'warm', '???', '???', '???', '???']
#PROGRAM-3:Write a program to demonstrate the working of the decision tree
based ID3 algorithm. Use an appropriate data set for building the decision tree
and apply this knowledge to classify a new sample.
program3.py
import pandas as pd
import numpy as np
dataset=pd.read_csv('playtennis.csv',names=['outlook','temperature','humidity','w
ind','class',])
attributes=('Outlook','Temperature','Humidity','Wind','PlayTennis')
def entropy(target_col):
elements,counts=np.unique(target_col,return_counts=True)
entropy=np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts))
for i in range(len(elements))])
return entropy
def InfoGain(data,split_attribute_name,target_name="class"):
total_entropy=entropy(data[target_name])
vals,counts=np.unique(data[split_attribute_name],return_counts=True)
Weighted_entropy=np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[s
plit_attribute_name]==vals[i]).dropna()[target_name]) for i in
range(len(vals))])
Information_Gain=total_entropy-Weighted_entropy
return Information_Gain
def
ID3(data,originaldata,features,target_attribute_name="class",parent_node_class=No
ne):
if len(np.unique(data[target_attribute_name]))<=1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return
np.unique(originaldata[target_attribute_name])[np.argmax(np.unique(origin
aldata[target_attibute_name],return_counts=True)[1])]
elif len(features)==0:
return parent_node_class
else:
parent_node_class=np.unique(data[target_attribute_name])[np.argmax(np.uni
que(data[target_attribute_name],return_counts=True)[1])]
item_values=[InfoGain(data,feature,target_attribute_name) for feature
in features]
best_feature_index=np.argmax(item_values)
best_feature=features[best_feature_index]
tree={best_feature:{}}
features=[i for i in features if i!=best_feature]
for value in np.unique(data[best_feature]):
value=value
sub_data=data.where(data[best_feature]==value).dropna()
subtree=ID3(sub_data,dataset,features,target_attribute_name,parent_no
de_class)
tree[best_feature][value]=subtree
return(tree)
def predict(query,tree,default=1):
for key in list(query.keys()):
if key in list(tree.keys()):
try:
result=tree[key][query[key]]
except:
return default
result=tree[key][query[key]]
if isinstance(result,dict):
return predict(query,result)
else:
return result
def train_test_split(dataset):
training_data=dataset.iloc[:14].reset_index(drop=True)
return training_data
def test(data,tree):
queries=data.iloc[:,:-1].to_dict(orient="records")
predicted=pd.DataFrame(columns=["predicted"])
for i in range(len(data)):
predicted.loc[i,"predicted"]=predict(queries[i],tree,1.0)
print('The predicted accuracy is:
',(np.sum(predicted["predicted"]==data["class"])/len(data))*100,'%')
XX=train_test_split(dataset)
training_data=XX
tree=ID3(training_data,training_data,training_data.columns[:-1])
print('\nDisplay Tree\n',tree)
print('len=',len(training_data))
test(training_data,tree)
#DATASET FOR PROGRAM-3
playtennis.csv
Outlook,Temperature,Humidity,Windy,PlayTennis
Sunny,Hot,High,Weak,No
Sunny,Hot,High,Strong,No
Overcast,Hot,High,Weak,Yes
Rain,Mild,High,Weak,Yes
Rain,Cool,Normal,Weak,Yes
Rain,Cool,Normal,Strong,No
Overcast,Cool,Normal,Strong,Yes
Sunny,Mild,High,Weak,No
Sunny,Cool,Normal,Weak,Yes
Rain,Mild,Normal,Weak,Yes
Sunny,Mild,Normal,Strong,Yes
Overcast,Mild,High,Strong,Yes
Overcast,Hot,Normal,Weak,Yes
Rain,Mild,High,Strong,No
#OUTPUT FOR PORGRAM-3
Display Tree
{'outlook': {'Outlook': 'PlayTennis', 'Overcast': 'Yes', 'Rain': {'wind':
{'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'humidity': {'High': 'No',
'Normal': 'Yes'}}}}
len= 14
The predicted accuracy is: 100.0 %
#PROGRAM-4:Build an Artificial Neural Network by implementing the
Backpropagation algorithm and test the same using appropriate data sets.
program4.py
import numpy as np
x = np.array(([2,9],[1,5],[3,6]),dtype=float)
y = np.array(([92],[86],[89]),dtype=float)
x = x/np.amax(x,axis=0)
y = y/100
def sigmoid (x):
return 1/(1 + np.exp(-x))
def derivatives_sigmoid(x):
return x * (1 - x)
epoch=5000
lr=0.1
inputlayer_neurons = 2
hiddenlayer_neurons = 3
output_neurons = 1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
hinp1=np.dot(x,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
E0 = y-output
outgrad =derivatives_sigmoid(output)
d_output = E0* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr
wh += x.T.dot(d_hiddenlayer) *lr
print("Input:\n" + str(x))
print("Actual Output: \n" + str(y))
print("PredictedOutput: \n",output)
#OUTPUT FOR PROGRAM-4
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
PredictedOutput:
[[0.86284816]
[0.85062962]
[0.86425164]]
#PROGRAM-5:Write a program to implement the naïve Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier, considering few test data sets.
program5.py
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv('playtennis.csv')
print("The first 5 values of data is :\n",data.head())
x = data.iloc[:,:-1]
print("\nThe first 5 values of train data is\n",x.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of train output is\n",y.head())
le_Outlook = LabelEncoder()
x.Outlook = le_Outlook.fit_transform(x.Outlook)
le_Temperature=LabelEncoder()
x.Temperature=le_Temperature.fit_transform(x.Temperature)
le_Humidity=LabelEncoder()
x.Humidity=le_Humidity.fit_transform(x.Humidity)
le_Windy=LabelEncoder()
x.Windy=le_Windy.fit_transform(x.Windy)
print("\nNow the train data is :\n",x.head())
le_PlayTennis=LabelEncoder()
y=le_PlayTennis.fit_transform(y)
print("\nNow the train output is \n",y)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20)
classifier=GaussianNB()
classifier.fit(x_train,y_train)
from sklearn.metrics import accuracy_score
print("Accuracy is:",accuracy_score(classifier.predict(x_test),y_test))
#DATASET PROGRAM-5
playtennis.csv
Outlook,Temperature,Humidity,Windy,PlayTennis
Sunny,Hot,High,Weak,No
Sunny,Hot,High,Strong,No
Overcast,Hot,High,Weak,Yes
Rain,Mild,High,Weak,Yes
Rain,Cool,Normal,Weak,Yes
Rain,Cool,Normal,Strong,No
Overcast,Cool,Normal,Strong,Yes
Sunny,Mild,High,Weak,No
Sunny,Cool,Normal,Weak,Yes
Rain,Mild,Normal,Weak,Yes
Sunny,Mild,Normal,Strong,Yes
Overcast,Mild,High,Strong,Yes
Overcast,Hot,Normal,Weak,Yes
Rain,Mild,High,Strong,No
#OUTPUT FOR PROGRAM-5
The first 5 values of data is :
Outlook Temperature Humidity Windy PlayTennis
0 Sunny Hot High Weak No
1 Sunny Hot High Strong No
2 Overcast Hot High Weak Yes
3 Rain Mild High Weak Yes
4 Rain Cool Normal Weak Yes
The first 5 values of train data is
Outlook Temperature Humidity Windy
0 Sunny Hot High Weak
1 Sunny Hot High Strong
2 Overcast Hot High Weak
3 Rain Mild High Weak
4 Rain Cool Normal Weak
The first 5 values of train output is
0 No
1 No
2 Yes
3 Yes
4 Yes
Name: PlayTennis, dtype: object
Now the train data is :
Outlook Temperature Humidity Windy
0 2 1 0 1
1 2 1 0 0
2 0 1 0 1
3 1 2 0 1
4 1 0 1 1
Now the train output is
[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 0.6666666666666666
#PROGRAM-6:Assuming a set of documents that need to be classified, use the
naïve Bayesian Classifier model to perform this task. Built-in Java classes/API
can be used to write the program. Calculate the accuracy, precision, and recall
for your data set.
program6.py
import pandas as pd
msg=pd.read_csv('prog6.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
x=msg.message
y=msg.labelnum
print(x)
print(y)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)
print(xtrain_dtm)
from sklearn.naive_bayes import MultinomialNB
clf=MultinomialNB().fit(xtrain_dtm,ytrain)
predicted=clf.predict(xtest_dtm)
from sklearn import metrics
print('Accuracy Metrics')
print('Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precision')
print(metrics.recall_score(ytest,predicted))
docs_new = ['I like this place','My boss is not my saviour']
x_new_counts = count_vect.transform(docs_new)
predictednew = clf.predict(x_new_counts)
for doc, category in zip(docs_new,predictednew):
print('%s->%s' %(doc,msg.labelnum[category]))
#DATASET PROGRAM-6
prog6.csv
I love this sandwich,pos
This is an amazing place,pos
I feel very good about these beers,pos
This is my best work,pos
What an amazing view,pos
I do not like this restaurant,neg
I am tired of this stuff,neg
I cant deal with this,neg
He is my sworn enemy,neg
My boss is horrible,neg
This is an amazing place,neg
I do not like the taste of third juice,neg
I love to dance,pos
I am sick and tired of this place,neg
What a great holiday,pos
This is a bad locality to stay,neg
We will have good fun tomorrow,pos
I went to my enemy's house today,neg
#OUTPUT FOR PROGRAM-6
The dimensions of the dataset (18, 2)
0 I love this sandwich
1 This is an amazing place
2 I feel very good about these beers
3 This is my best work
4 What an amazing view
5 I do not like this restaurant
6 I am tired of this stuff
7 I cant deal with this
8 He is my sworn enemy
9 My boss is horrible
10 This is an amazing place
11 I do not like the taste of third juice
12 I love to dance
13 I am sick and tired of this place
14 What a great holiday
15 This is a bad locality to stay
16 We will have good fun tomorrow
17 I went to my enemy's house today
Name: message, dtype: object
0 1
1 1
2 1
3 1
4 1
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 1
13 0
14 1
15 0
16 1
17 0
Name: labelnum, dtype: int64
(5,)
(13,)
(5,)
(13,)
['about', 'am', 'bad', 'beers', 'best', 'boss', 'cant', 'dance', 'deal', 'do',
'enemy', 'feel', 'fun', 'good', 'great', 'have', 'he', 'holiday', 'horrible',
'house', 'is', 'juice', 'like', 'locality', 'love', 'my', 'not', 'of',
'restaurant', 'stay', 'stuff', 'sworn', 'taste', 'the', 'these', 'third',
'this', 'tired', 'to', 'today', 'tomorrow', 'very', 'we', 'went', 'what',
'will', 'with', 'work']
about am bad beers best boss cant dance deal do ... to today \
0 0 0 0 0 0 0 0 0 0 0 ... 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0
2 0 0 0 0 0 0 0 0 0 1 ... 0 0
3 0 0 0 0 1 0 0 0 0 0 ... 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 1 0
5 0 0 0 0 0 1 0 0 0 0 ... 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 0 0
7 0 1 0 0 0 0 0 0 0 0 ... 0 0
8 0 0 0 0 0 0 0 0 0 0 ... 1 1
9 0 0 0 0 0 0 0 1 0 0 ... 1 0
10 0 0 0 0 0 0 1 0 1 0 ... 0 0
11 1 0 0 1 0 0 0 0 0 0 ... 0 0
12 0 0 0 0 0 0 0 0 0 1 ... 0 0
tomorrow very we went what will with work
0 1 0 1 0 0 1 0 0
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 1
4 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0
6 0 0 0 0 1 0 0 0
7 0 0 0 0 0 0 0 0
8 0 0 0 1 0 0 0 0
9 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 1 0
11 0 1 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0
[13 rows x 48 columns]
(0, 42) 1
(0, 45) 1
(0, 15) 1
(0, 13) 1
(0, 12) 1
(0, 40) 1
(1, 16) 1
(1, 20) 1
(1, 25) 1
(1, 31) 1
(1, 10) 1
(2, 9) 1
(2, 26) 1
(2, 22) 1
(2, 33) 1
(2, 32) 1
(2, 27) 1
(2, 35) 1
(2, 21) 1
(3, 20) 1
(3, 25) 1
(3, 36) 1
(3, 4) 1
(3, 47) 1
(4, 20) 1
: :
(7, 30) 1
(8, 25) 1
(8, 10) 1
(8, 38) 1
(8, 43) 1
(8, 19) 1
(8, 39) 1
(9, 38) 1
(9, 24) 1
(9, 7) 1
(10, 36) 1
(10, 6) 1
(10, 8) 1
(10, 46) 1
(11, 13) 1
(11, 11) 1
(11, 41) 1
(11, 0) 1
(11, 34) 1
(11, 3) 1
(12, 9) 1
(12, 26) 1
(12, 22) 1
(12, 36) 1
(12, 28) 1
Accuracy Metrics
Accuracy of the classifier is 0.6
Confusion matrix
[[2 0]
[2 1]]
Recall and Precision
0.3333333333333333
I like this place->1
My boss is not my saviour->1
#PROGRAM-7:Write a program to construct aBayesian network considering medical
data. Use this model to demonstrate the diagnosis of heart patients using
standard Heart Disease Data Set. You can use Java/Python ML library classes/API.
program7.py
import pandas as pd
data=pd.read_csv("program7.csv")
heart_disease=pd.DataFrame(data)
print(heart_disease)
from pgmpy.models import BayesianModel
model=BayesianModel([
('age','Lifestyle'),
('Gender','Lifestyle'),
('Family','heartdisease'),
('diet','cholestrol'),
('Lifestyle','diet'),
('cholestrol','heartdisease'),
('diet','cholestrol')
])
from pgmpy.estimators import MaximumLikelihoodEstimator
model.fit(heart_disease,estimator=MaximumLikelihoodEstimator)
from pgmpy.inference import VariableElimination
HeartDisease_infer=VariableElimination(model)
print('For age Enter {
SuperSeniorCitizen:0,SeniorCitizen:1,MiddleAged:2,Youth:3,Teen:4 }')
print('For Gender Enter{ Male:0,Female:1 }')
print('For Family History Enter{ Yes:1,No:0 }')
print('For diet Enter{ High:0,Medium:1 }')
print('For lifestyle Enter{ Athlete:0,Active:1,Moderate:2,Sedentary:3 }')
print('For cholestrol Enter{ High:0,BorderLine:1,Normal:2 }')
q=HeartDisease_infer.query(variables=['heartdisease'],evidence={
'age':int(input('Enter age:')),
'Gender':int(input('Enter Gender:')),
'Family':int(input('Enter Family History:')),
'diet':int(input('Enter Diet:')),
'Lifestyle':int(input('Enter Lifestyle:')),
'cholestrol':int(input('Enter cholestrol:'))
})
print(q)
#DATASET OF PROGRAM-7:
program7.csv
age,Gender,Family,diet,Lifestyle,cholestrol,heartdisease
0,0,1,1,3,0,1
0,1,1,1,3,0,1
1,0,0,0,2,1,1
4,0,1,1,3,2,0
3,1,1,0,0,2,0
2,0,1,1,1,0,1
4,0,1,0,2,0,1
0,0,1,1,3,0,1
3,1,1,0,0,2,0
1,1,0,0,0,2,1
4,1,0,1,2,0,1
4,0,1,1,3,2,0
2,1,0,0,0,0,0
2,0,1,1,1,0,1
3,1,1,0,0,1,0
0,0,1,0,0,2,1
1,1,0,1,2,1,1
3,1,1,1,0,1,0
4,0,1,1,3,2,0
#OUTPUT FOR PROGRAM-7
age Gender Family diet Lifestyle cholestrol heartdisease
0 0 0 1 1 3 0 1
1 0 1 1 1 3 0 1
2 1 0 0 0 2 1 1
3 4 0 1 1 3 2 0
4 3 1 1 0 0 2 0
5 2 0 1 1 1 0 1
6 4 0 1 0 2 0 1
7 0 0 1 1 3 0 1
8 3 1 1 0 0 2 0
9 1 1 0 0 0 2 1
10 4 1 0 1 2 0 1
11 4 0 1 1 3 2 0
12 2 1 0 0 0 0 0
13 2 0 1 1 1 0 1
14 3 1 1 0 0 1 0
15 0 0 1 0 0 2 1
16 1 1 0 1 2 1 1
17 3 1 1 1 0 1 0
18 4 0 1 1 3 2 0
For age Enter {
SuperSeniorCitizen:0,SeniorCitizen:1,MiddleAged:2,Youth:3,Teen:4 }
For Gender Enter{ Male:0,Female:1 }
For Family History Enter{ Yes:1,No:0 }
For diet Enter{ High:0,Medium:1 }
For lifestyle Enter{ Athlete:0,Active:1,Moderate:2,Sedentary:3 }
For cholestrol Enter{ High:0,BorderLine:1,Normal:2 }
Enter age:4
Enter Gender:1
Enter Family History:0
Enter Diet:1
Enter Lifestyle:3
Enter cholestrol:0
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.5000 |
+-----------------+---------------------+
| heartdisease(1) | 0.5000 |
+-----------------+---------------------+
#PROGRAM-8:Apply EM algorithm to cluster a set of data stored in a .CSV file.
Use the same data set for clustering using k-Means algorithm. Compare the
results of these two algorithms and comment on the quality of clustering. You
can add Java/Python ML library classes/API in the program
program8.py
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
import pandas as pd
X=pd.read_csv("program8.csv")
x1 = X['V1'].values
x2 = X['V2'].values
X = np.array(list(zip(x1, x2))).reshape(len(x1), 2)
plt.plot()
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Dataset')
plt.scatter(x1, x2)
plt.show()
gmm = GaussianMixture(n_components=3)
gmm.fit(X)
em_predictions = gmm.predict(X)
print("\nEM predictions")
print(em_predictions)
print("mean:\n",gmm.means_)
print('\n')
print("Covariances\n",gmm.covariances_)
print(X)
plt.title('Exceptation Maximum')
plt.scatter(X[:,0], X[:,1],c=em_predictions,s=50)
plt.show()
import matplotlib.pyplot as plt1
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
print(kmeans.cluster_centers_)
print(kmeans.labels_)
plt.title('KMEANS')
plt1.scatter(X[:,0], X[:,1], c=kmeans.labels_, cmap='rainbow')
plt1.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1],
color='black')
#DATASET FOR PROGRAM-8:
V1,V2
2.072345,-3.24169
17.93671,15.78481
1.083576,7.319176
11.12067,14.40678
23.71155,2.557729
24.16993,32.02478
21.66578,4.892855
4.693684,12.34217
19.21191,-1.12137
4.230391,-4.44154
9.12713,23.60572
0.407503,15.29705
7.314846,3.309312
-3.4384,-12.0253
17.63935,-3.21235
4.415292,22.81555
11.94122,8.122487
0.725853,1.806819
8.815273,28.1326
-5.77359,1.0248
18.76943,24.16946
#OUTPUT FOR PROGRAM-8:
#PROGRAM-9:Write a program to implement k-Nearest Neighbour algorithm to
classify the iris data set. Print both correct and wrong predictions.
Java/Python ML library classes can be used for this problem.
program9.py
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_label=iris.target
print(iris_data)
print(iris_label)
x_train,x_test,y_train,y_test=train_test_split(iris_data,iris_label)
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train,y_train)
y_pred=classifier.predict(x_test)
print('Confusion matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Metrics')
print(classification_report(y_test,y_pred))
#OUPUT FOR PROGRAM-9:
[[5.1 3.5 1.4 0.2]
[4.9 3. 1.4 0.2]
[4.7 3.2 1.3 0.2]
[4.6 3.1 1.5 0.2]
[5. 3.6 1.4 0.2]
[5.4 3.9 1.7 0.4]
[4.6 3.4 1.4 0.3]
[5. 3.4 1.5 0.2]
[4.4 2.9 1.4 0.2]
[4.9 3.1 1.5 0.1]
[5.4 3.7 1.5 0.2]
[4.8 3.4 1.6 0.2]
[4.8 3. 1.4 0.1]
[4.3 3. 1.1 0.1]
[5.8 4. 1.2 0.2]
[5.7 4.4 1.5 0.4]
[5.4 3.9 1.3 0.4]
[5.1 3.5 1.4 0.3]
[5.7 3.8 1.7 0.3]
[5.1 3.8 1.5 0.3]
[5.4 3.4 1.7 0.2]
[5.1 3.7 1.5 0.4]
[4.6 3.6 1. 0.2]
[5.1 3.3 1.7 0.5]
[4.8 3.4 1.9 0.2]
[5. 3. 1.6 0.2]
[5. 3.4 1.6 0.4]
[5.2 3.5 1.5 0.2]
[5.2 3.4 1.4 0.2]
[4.7 3.2 1.6 0.2]
[4.8 3.1 1.6 0.2]
[5.4 3.4 1.5 0.4]
[5.2 4.1 1.5 0.1]
[5.5 4.2 1.4 0.2]
[4.9 3.1 1.5 0.2]
[5. 3.2 1.2 0.2]
[5.5 3.5 1.3 0.2]
[4.9 3.6 1.4 0.1]
[4.4 3. 1.3 0.2]
[5.1 3.4 1.5 0.2]
[5. 3.5 1.3 0.3]
[4.5 2.3 1.3 0.3]
[4.4 3.2 1.3 0.2]
[5. 3.5 1.6 0.6]
[5.1 3.8 1.9 0.4]
[4.8 3. 1.4 0.3]
[5.1 3.8 1.6 0.2]
[4.6 3.2 1.4 0.2]
[5.3 3.7 1.5 0.2]
[5. 3.3 1.4 0.2]
[7. 3.2 4.7 1.4]
[6.4 3.2 4.5 1.5]
[6.9 3.1 4.9 1.5]
[5.5 2.3 4. 1.3]
[6.5 2.8 4.6 1.5]
[5.7 2.8 4.5 1.3]
[6.3 3.3 4.7 1.6]
[4.9 2.4 3.3 1. ]
[6.6 2.9 4.6 1.3]
[5.2 2.7 3.9 1.4]
[5. 2. 3.5 1. ]
[5.9 3. 4.2 1.5]
[6. 2.2 4. 1. ]
[6.1 2.9 4.7 1.4]
[5.6 2.9 3.6 1.3]
[6.7 3.1 4.4 1.4]
[5.6 3. 4.5 1.5]
[5.8 2.7 4.1 1. ]
[6.2 2.2 4.5 1.5]
[5.6 2.5 3.9 1.1]
[5.9 3.2 4.8 1.8]
[6.1 2.8 4. 1.3]
[6.3 2.5 4.9 1.5]
[6.1 2.8 4.7 1.2]
[6.4 2.9 4.3 1.3]
[6.6 3. 4.4 1.4]
[6.8 2.8 4.8 1.4]
[6.7 3. 5. 1.7]
[6. 2.9 4.5 1.5]
[5.7 2.6 3.5 1. ]
[5.5 2.4 3.8 1.1]
[5.5 2.4 3.7 1. ]
[5.8 2.7 3.9 1.2]
[6. 2.7 5.1 1.6]
[5.4 3. 4.5 1.5]
[6. 3.4 4.5 1.6]
[6.7 3.1 4.7 1.5]
[6.3 2.3 4.4 1.3]
[5.6 3. 4.1 1.3]
[5.5 2.5 4. 1.3]
[5.5 2.6 4.4 1.2]
[6.1 3. 4.6 1.4]
[5.8 2.6 4. 1.2]
[5. 2.3 3.3 1. ]
[5.6 2.7 4.2 1.3]
[5.7 3. 4.2 1.2]
[5.7 2.9 4.2 1.3]
[6.2 2.9 4.3 1.3]
[5.1 2.5 3. 1.1]
[5.7 2.8 4.1 1.3]
[6.3 3.3 6. 2.5]
[5.8 2.7 5.1 1.9]
[7.1 3. 5.9 2.1]
[6.3 2.9 5.6 1.8]
[6.5 3. 5.8 2.2]
[7.6 3. 6.6 2.1]
[4.9 2.5 4.5 1.7]
[7.3 2.9 6.3 1.8]
[6.7 2.5 5.8 1.8]
[7.2 3.6 6.1 2.5]
[6.5 3.2 5.1 2. ]
[6.4 2.7 5.3 1.9]
[6.8 3. 5.5 2.1]
[5.7 2.5 5. 2. ]
[5.8 2.8 5.1 2.4]
[6.4 3.2 5.3 2.3]
[6.5 3. 5.5 1.8]
[7.7 3.8 6.7 2.2]
[7.7 2.6 6.9 2.3]
[6. 2.2 5. 1.5]
[6.9 3.2 5.7 2.3]
[5.6 2.8 4.9 2. ]
[7.7 2.8 6.7 2. ]
[6.3 2.7 4.9 1.8]
[6.7 3.3 5.7 2.1]
[7.2 3.2 6. 1.8]
[6.2 2.8 4.8 1.8]
[6.1 3. 4.9 1.8]
[6.4 2.8 5.6 2.1]
[7.2 3. 5.8 1.6]
[7.4 2.8 6.1 1.9]
[7.9 3.8 6.4 2. ]
[6.4 2.8 5.6 2.2]
[6.3 2.8 5.1 1.5]
[6.1 2.6 5.6 1.4]
[7.7 3. 6.1 2.3]
[6.3 3.4 5.6 2.4]
[6.4 3.1 5.5 1.8]
[6. 3. 4.8 1.8]
[6.9 3.1 5.4 2.1]
[6.7 3.1 5.6 2.4]
[6.9 3.1 5.1 2.3]
[5.8 2.7 5.1 1.9]
[6.8 3.2 5.9 2.3]
[6.7 3.3 5.7 2.5]
[6.7 3. 5.2 2.3]
[6.3 2.5 5. 1.9]
[6.5 3. 5.2 2. ]
[6.2 3.4 5.4 2.3]
[5.9 3. 5.1 1.8]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2]
Confusion matrix is as follows
[[14 0 0]
[ 0 12 1]
[ 0 1 10]]
Accuracy Metrics
precision recall f1-score support
0 1.00 1.00 1.00 14
1 0.92 0.92 0.92 13
2 0.91 0.91 0.91 11
accuracy 0.95 38
macro avg 0.94 0.94 0.94 38
weighted avg 0.95 0.95 0.95 38
#PROGRAM-10:Implement the non-parametric Locally Weighted Regressionalgorithm
in order to fit data points. Select appropriate data set for your experiment
and draw graphs.
program10.py
from math import ceil
import numpy as np
from scipy import linalg
def lowess(x, y, f, iterations):
n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iterations):
for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights *
x)],[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest
import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)
import matplotlib.pyplot as plt
plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")
#OUTPUT FOR PROGRAM-10: