In [1]: import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from [Link] import confusion_matrix, classification_report, accurac
In [2]: from [Link] import load_iris
# Load the dataset
iris = load_iris()
# Convert to pandas DataFrame for easier handling
data = [Link](data= np.c_[iris['data'], iris['target']], columns= iris
Exploring the Dataset
In [3]: print([Link]())
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
\
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
target
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
In [4]: # Data types and non-null counts
print([Link]())
<class '[Link]'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepal length (cm) 150 non-null float64
1 sepal width (cm) 150 non-null float64
2 petal length (cm) 150 non-null float64
3 petal width (cm) 150 non-null float64
4 target 150 non-null float64
dtypes: float64(5)
memory usage: 6.0 KB
None
In [5]: # Summary statistics
print([Link]())
sepal length (cm) sepal width (cm) petal length (cm) \
count 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000
std 0.828066 0.435866 1.765298
min 4.300000 2.000000 1.000000
25% 5.100000 2.800000 1.600000
50% 5.800000 3.000000 4.350000
75% 6.400000 3.300000 5.100000
max 7.900000 4.400000 6.900000
petal width (cm) target
count 150.000000 150.000000
mean 1.199333 1.000000
std 0.762238 0.819232
min 0.100000 0.000000
25% 0.300000 0.000000
50% 1.300000 1.000000
75% 1.800000 2.000000
max 2.500000 2.000000
In [7]: # Data members correlation
print([Link]())
sepal length (cm) sepal width (cm) petal length (cm) \
sepal length (cm) 1.000000 -0.117570 0.871754
sepal width (cm) -0.117570 1.000000 -0.428440
petal length (cm) 0.871754 -0.428440 1.000000
petal width (cm) 0.817941 -0.366126 0.962865
target 0.782561 -0.426658 0.949035
petal width (cm) target
sepal length (cm) 0.817941 0.782561
sepal width (cm) -0.366126 -0.426658
petal length (cm) 0.962865 0.949035
petal width (cm) 1.000000 0.956547
target 0.956547 1.000000
Mapping Target Values to Species
In [8]: # Mapping target to species
species_map = {0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}
data['species'] = data['target'].map(species_map)
print(data[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'p
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
\
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
species
0 setosa
1 setosa
2 setosa
3 setosa
4 setosa
Checking for Missing Values
In [9]: # Checking for missing values
print([Link]().sum())
sepal length (cm) 0
sepal width (cm) 0
petal length (cm) 0
petal width (cm) 0
target 0
species 0
dtype: int64
No missing values are present in the dataset.
Data Preprocessing
Feature Selection
Splitting Features and Target
Separate the dataset into features (X) and target (y).
In [10]: # Features
X = [Link][:, 0:4].values # or data[['sepal length (cm)', 'sepal width (
# Target
y = data['target'].values
Feature Scaling
In [11]: from [Link] import StandardScaler
# Initialize the scaler
scaler = StandardScaler()
# Fit the scaler on the features and transform
X_scaled = scaler.fit_transform(X)
Splitting the Dataset
In [12]: # Split the dataset into training and testing sets
# Test size = 20% of the dataset, random_state for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0
print(f'Training set size: {X_train.shape[0]} samples')
print(f'Testing set size: {X_test.shape[0]} samples')
Training set size: 120 samples
Testing set size: 30 samples
Implementing Logistic Regression
Initializing the Model
In [13]: # Initialize the Logistic Regression model
# Using multinomial since the target has more than two classes
# solver='lbfgs' is suitable for multinomial loss
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_it
Parameters Explained:
multi_class='multinomial': Specifies that the loss function should be the
multinomial loss fit across all classes. Suitable for multi-class classification.
solver='lbfgs': An optimization algorithm suitable for small datasets and
supports multinomial loss.
max_iter=200: Maximum number of iterations taken for the solvers to converge.
Increased from default 100 to ensure convergence.
Training the Model
In [14]: # Train the model using the training data
[Link](X_train, y_train)
Out[14]: ▾ LogisticRegression
LogisticRegression(max_iter=200, multi_class='multinomial')
Making Predictions
In [15]: # Predict the classes for the testing set
y_pred = [Link](X_test)
In [16]: # Create a DataFrame to compare actual and predicted values
comparison = [Link]({'Actual': y_test, 'Predicted': y_pred})
print(comparison)
Actual Predicted
0 1.0 1.0
1 0.0 0.0
2 2.0 2.0
3 1.0 1.0
4 1.0 1.0
5 0.0 0.0
6 1.0 1.0
7 2.0 2.0
8 1.0 1.0
9 1.0 1.0
10 2.0 2.0
11 0.0 0.0
12 0.0 0.0
13 0.0 0.0
14 0.0 0.0
15 1.0 1.0
16 2.0 2.0
17 1.0 1.0
18 1.0 1.0
19 2.0 2.0
20 0.0 0.0
21 2.0 2.0
22 0.0 0.0
23 2.0 2.0
24 2.0 2.0
25 2.0 2.0
26 2.0 2.0
27 2.0 2.0
28 0.0 0.0
29 0.0 0.0
Evaluating the Model Using Confusion
Matrix
Creating the Confusion Matrix
In [17]: # Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
In this output:
Row 0 (Actual class 0 - setosa): 10 correctly predicted as setosa.
Row 1 (Actual class 1 - versicolor): 10 correctly predicted as versicolor.
Row 2 (Actual class 2 - virginica): 10 correctly predicted as virginica.
Total samples correctly predicted: 30 out of 30 (100% accuracy in this sample
run).
Visualizing the Confusion Matrix
In [18]: [Link](figsize=(8,6))
[Link](cm, annot=True, fmt='d', cmap='Blues',
xticklabels=iris.target_names,
yticklabels=iris.target_names)
[Link]('Actual')
[Link]('Predicted')
[Link]('Confusion Matrix')
[Link]()
Interpreting Results
Accuracy Score
In [19]: # Computing the Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
Accuracy: 100.00%
Classification Report
In [20]: from [Link] import classification_report
# Generate classification report
report = classification_report(y_test, y_pred, target_names=iris.target_name
print(report)
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
This notebook was converted to PDF with [Link]