e22cseu1389-assignment10-1
November 18, 2024
[13]: import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("Iris.csv")
print(df.head())
binary_df = df[df["Species"].isin(["Iris-setosa", "Iris-versicolor"])]
binary_df["Species"] = binary_df["Species"].apply(lambda x: 0 if x ==␣
↪"Iris-setosa" else 1)
plt.figure(figsize = (12, 5))
plt.subplot(1, 2, 1)
sns.scatterplot(data = binary_df, x = "SepalLengthCm", y = "SepalWidthCm", hue␣
↪= "Species", palette = "Set1")
plt.title("Sepal Length vs Sepal Width")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.subplot(1, 2, 2)
sns.scatterplot(data = binary_df, x = "PetalLengthCm", y = "PetalWidthCm", hue␣
↪= "Species", palette = "Set1")
plt.title("Petal Length vs Petal Width")
plt.xlabel("Petal Length (cm)")
plt.ylabel("Petal Width (cm)")
plt.tight_layout()
plt.show()
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa
<ipython-input-13-0e2f58a324b3>:8: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
1
See the caveats in the documentation: https://pandas.pydata.org/pandas-
docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
binary_df["Species"] = binary_df["Species"].apply(lambda x: 0 if x == "Iris-
setosa" else 1)
[14]: from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import numpy as np
X = binary_df[["SepalLengthCm", "SepalWidthCm"]].values
y = binary_df["Species"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,␣
↪random_state = 69)
svm_model = SVC(kernel = "linear", C = 1)
svm_model.fit(X_train, y_train)
plt.figure(figsize = (8, 6))
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.
↪01))
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap = "coolwarm", alpha = 0.3)
sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y, palette = "Set1", edgecolor␣
↪= "k")
2
plt.scatter(svm_model.support_vectors_[:, 0], svm_model.support_vectors_[:, 1],␣
↪s = 100, facecolors = "none", edgecolor = "k", linewidth = 1.5, label =␣
↪"Support Vectors")
plt.title("Linear SVM Decision Boundary and Support Vectors")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend()
plt.show()
[15]: from sklearn.metrics import accuracy_score, precision_score, recall_score,␣
↪f1_score
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
3
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
[16]: C_values = [0.1, 1, 10]
plt.figure(figsize = (15, 5))
for i, C in enumerate(C_values, 1):
svm_model = SVC(kernel = "linear", C = C)
svm_model.fit(X_train, y_train)
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max,␣
↪0.01))
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.subplot(1, 3, i)
plt.contourf(xx, yy, Z, cmap = "coolwarm", alpha = 0.3)
sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y, palette = "Set1",␣
↪edgecolor = "k")
plt.scatter(svm_model.support_vectors_[:, 0], svm_model.support_vectors_[:,␣
↪1], s = 100, facecolors = "none", edgecolor = "k", linewidth = 1.5, label =␣
↪"Support Vectors")
plt.title(f"SVM Decision Boundary with C = {C}")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend()
plt.tight_layout()
plt.show()
4
[17]: print('''Decision Boundary: With lower C, the decision boundary might allow␣
↪more misclassifications but a larger margin, while higher C values lead to a␣
↪stricter boundary.
Support Vectors: The count and positions of support vectors may vary with C␣
↪since they define the margin.
Effect of C Parameter: Summarize that lower C values allow more errors but aim␣
↪for a simpler, generalized model. Higher C values focus on correct␣
↪classification but may lead to overfitting.
Limitations of Linear SVM: Note that linear SVMs may struggle with non-linearly␣
↪separable data. For such cases, using a non-linear kernel like RBF or␣
↪polynomial might improve performance.''')
Decision Boundary: With lower C, the decision boundary might allow more
misclassifications but a larger margin, while higher C values lead to a stricter
boundary.
Support Vectors: The count and positions of support vectors may vary with C
since they define the margin.
Effect of C Parameter: Summarize that lower C values allow more errors but aim
for a simpler, generalized model. Higher C values focus on correct
classification but may lead to overfitting.
Limitations of Linear SVM: Note that linear SVMs may struggle with non-linearly
separable data. For such cases, using a non-linear kernel like RBF or polynomial
might improve performance.