knn-final
September 28, 2023
K-NEAREST NEIGHBOUR CLASSIFICATION (KNN) - ASSIGNMENT 3
1.Importing the Libraries and Dataset
[9]: import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
[37]: dataset=pd.read_csv('Social_Network_Ads.csv')
dataset.head()
[37]: User ID Gender Age EstimatedSalary Purchased
0 15624510 Male 19 19000 0
1 15810944 Male 35 20000 0
2 15668575 Female 26 43000 0
3 15603246 Female 27 57000 0
4 15804002 Male 19 76000 0
[ ]: SPLITTING OF DATASET
[41]: print(y_train)
[0 1 0 1 1 1 0 0 0 0 0 0 1 1 1 0 1 0 0 1 0 1 0 1 0 0 1 1 1 1 0 1 0 1 0 0 1
0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 1 1 0 0 1 0 1
1 1 0 0 1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 0
1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0
0 0 1 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 1 0 0
0 0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0
0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0
0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1
0 0 0 0]
[43]: print(y_test)
[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0
0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1
0 0 0 0 1 1 1 0 0 0 1 1 0 1 1 0 0 1 0 0 0 1 0 1 1 1]
FEATURE SCALING
1
[44]: from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
TRAINING THE MODEL
[47]: from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p =␣
↪2)#The default metric is minkowski, and with p=2 is equivalent to the␣
↪standard Euclidean metric.
classifier.fit(X_train, y_train)
[47]: KNeighborsClassifier()
[ ]: PREDICTING THE TEST SET
[48]: y_pred = classifier.predict(X_test)
[ ]: CONFUSION MATRIX
[50]: from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)
[[64 4]
[ 3 29]]
[50]: 0.93
VISUALISING RESULTS
[57]: import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Create a smaller subset of data for plotting
X_set, y_set = sc.inverse_transform(X_test[:1000]), y_test[:1000]
# Define the range for the mesh grid
x1_min, x1_max = X_set[:, 0].min() - 10, X_set[:, 0].max() + 10
x2_min, x2_max = X_set[:, 1].min() - 1000, X_set[:, 1].max() + 1000
# Create a mesh grid with smaller steps
x1, x2 = np.meshgrid(np.arange(start=x1_min, stop=x1_max, step=1),
np.arange(start=x2_min, stop=x2_max, step=100))
# Predict and reshape
2
Z = classifier.predict(sc.transform(np.array([x1.ravel(), x2.ravel()]).T)).
↪reshape(x1.shape)
# Define a prettier color palette
custom_cmap = ListedColormap(['#FF7F50', '#00FF7F'])
# Create a figure and axis
fig, ax = plt.subplots(figsize=(10, 8))
# Plot the contour map using custom colors
contour = ax.contourf(x1, x2, Z, alpha=0.75, cmap=custom_cmap)
# Plot the actual data points with different markers for each class
markers = ['o', 's']
for i, j in enumerate(np.unique(y_set)):
ax.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=custom_cmap(i),␣
↪label=f'Class {j}', marker=markers[i], edgecolors='k')
# Set labels and title
ax.set_xlim(x1.min(), x1.max())
ax.set_ylim(x2.min(), x2.max())
ax.set_title('K-NN Classification (Test set)')
ax.set_xlabel('Age')
ax.set_ylabel('Estimated Salary')
# Add legend
ax.legend()
# Add a colorbar for the contour map
cbar = plt.colorbar(contour)
cbar.set_label('Predicted Class', rotation=270, labelpad=20)
# Customize the gridlines
ax.grid(True, linestyle='--', alpha=0.5)
# Show the plot
plt.show()
C:\Users\Keerthana\AppData\Local\Temp\ipykernel_1356\2225538368.py:30:
UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence,
which should be avoided as value-mapping will have precedence in case its length
matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D
array with a single row if you intend to specify the same RGB or RGBA value for
all points.
ax.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=custom_cmap(i),
label=f'Class {j}', marker=markers[i], edgecolors='k')
3
[ ]: