0% found this document useful (0 votes)
20 views

Assignment 4 Instructions

assignment 4

Uploaded by

Abhishek Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
20 views

Assignment 4 Instructions

assignment 4

Uploaded by

Abhishek Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Assignment_4_Instructions

July 14, 2021

[1]: from sklearn.datasets import make_classification


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy
from tqdm import tqdm
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

x,y = make_classification(n_samples=10000, n_features=2, n_informative=2,␣


,→n_redundant= 0, n_clusters_per_class=1, random_state=60)

X_train, X_test, y_train, y_test =␣


,→train_test_split(x,y,stratify=y,random_state=42)

# del X_train,X_test

[7]: X_train.shape

[7]: (7500, 2)

[2]: %matplotlib inline


import matplotlib.pyplot as plt
colors = {0:'red', 1:'blue'}
plt.scatter(X_test[:,0], X_test[:,1],c=y_test)
plt.show()

1
1 Implementing Custom RandomSearchCV
[21]: from sklearn.metrics import accuracy_score
from random import randint
import pdb
def select_test_indices(x_train,j,folds):
return list(range(int(j*len(x_train)/folds), int((j+1)*len(x_train)/folds)))

def RandomSearch(x_train,y_train,classifier, params, folds):

trainscores = []
testscores = []
for k in tqdm(params):

trainscores_folds = []
testscores_folds = []
for j in range(0, folds):
# check this out: https://stackoverflow.com/a/9755548/4084039
test_indices = select_test_indices(x_train,j,folds)
train_indices = list(set(list(range(0, len(x_train)))) -␣
,→set(test_indices))

#pdb.set_trace()
# selecting the data points based on the train_indices and␣
,→test_indices

2
X_train = x_train[train_indices]
Y_train = y_train[train_indices]
X_test = x_train[test_indices]
Y_test = y_train[test_indices]

classifier.n_neighbors = k
classifier.fit(X_train,Y_train)

Y_predicted = classifier.predict(X_test)
testscores_folds.append(accuracy_score(Y_test, Y_predicted))

Y_predicted = classifier.predict(X_train)
trainscores_folds.append(accuracy_score(Y_train, Y_predicted))
trainscores.append(np.mean(np.array(trainscores_folds)))
testscores.append(np.mean(np.array(testscores_folds)))
return trainscores,testscores

[32]: from sklearn.metrics import accuracy_score


from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from random import randint
import warnings
warnings.filterwarnings("ignore")

neigh = KNeighborsClassifier()

params = [randint(1,30) for p in range(0,10)]


params.sort()
#print(params)
folds = 3

trainscores,testscores = RandomSearch(X_train, y_train, neigh, params, folds)

plt.plot(params,trainscores, label='train cruve')


plt.plot(params,testscores, label='test cruve')
plt.title('Hyper-parameter VS accuracy plot')
plt.legend()
plt.show()

100%|����������| 10/10 [00:17<00:00, 1.75s/it]

3
[33]: def plot_decision_boundary(X1, X2, y, clf):
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

x_min, x_max = X1.min() - 1, X1.max() + 1


y_min, y_max = X2.min() - 1, X2.max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max,␣


,→ 0.02))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X1, X2, c=y, cmap=cmap_bold)

plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("2-Class classification (k = %i)" % (clf.n_neighbors))
plt.show()

4
[34]: from matplotlib.colors import ListedColormap
neigh = KNeighborsClassifier(n_neighbors = 16)
neigh.fit(X_train, y_train)
plot_decision_boundary(X_train[:, 0], X_train[:, 1], y_train, neigh)

[ ]:

You might also like