0% found this document useful (0 votes)
26 views7 pages

AI Assignment-6

The document contains multiple Python scripts that demonstrate various machine learning techniques using libraries like pandas and scikit-learn. It includes generating synthetic datasets, training models such as K-Nearest Neighbors, Naive Bayes, and Decision Trees, and evaluating their performance with metrics like accuracy and F1 score. Additionally, it shows how to save datasets and results to CSV files for further analysis.

Uploaded by

mighty.wizard.op
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views7 pages

AI Assignment-6

The document contains multiple Python scripts that demonstrate various machine learning techniques using libraries like pandas and scikit-learn. It includes generating synthetic datasets, training models such as K-Nearest Neighbors, Naive Bayes, and Decision Trees, and evaluating their performance with metrics like accuracy and F1 score. Additionally, it shows how to save datasets and results to CSV files for further analysis.

Uploaded by

mighty.wizard.op
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

Q1

import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import KNeighborsRegressor

# Step 1: Generate synthetic dataset


data = {
'Experience': [5, 8, 3, 10, 2, 7],
'Written_Score': [8, 7, 6, 9, 5, 8],
'Interview_Score': [10, 6, 7, 8, 9, 5],
'Salary': [60000, 80000, 45000, 90000, 35000, 75000]
}

df = [Link](data)

# Step 2: Save dataset to a .csv file


df.to_csv('candidates_dataset.csv', index=False)

# Step 3: Load dataset


df = pd.read_csv('candidates_dataset.csv')

# Step 4: Split dataset into features and target


X = [Link]('Salary', axis=1)
y = df['Salary']

# Step 5: Split dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)

# Step 6: Standardize features (optional)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = [Link](X_test)

# Step 7: Build KNN model


knn_model = KNeighborsRegressor(n_neighbors=3) # Specify the value of
K
# Step 8: Train the model
knn_model.fit(X_train_scaled, y_train)

# Step 9: Make predictions on the testing set


y_pred = knn_model.predict(X_test_scaled)

# Step 10: Use the trained model to predict salaries for new candidates
new_candidates = [Link]({
'Experience': [5, 8],
'Written_Score': [8, 7],
'Interview_Score': [10, 6]
})

# Standardize the new candidate data


new_candidates_scaled = [Link](new_candidates)

# Predict salaries for new candidates


predicted_salaries = knn_model.predict(new_candidates_scaled)
print("Predicted salaries for new candidates:")
for i, salary in enumerate(predicted_salaries):
print(f"Candidate {i+1}: ${salary:.2f}")

Q2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score, precision_score,
recall_score, f1_score

# Create dataset
data = {
'Graduation_Percentage': [80, 85, 70, 75, 90, 95, 85, 88, 92, 78,
83, 87, 79, 84, 91, 86, 93, 89, 76, 81, 74, 77, 82, 88, 94],
'Experience': [5, 8, 3, 6, 9, 2, 4, 7, 10, 2, 6, 3, 5, 7, 4, 8, 10,
9, 3, 7, 4, 6, 2, 8, 5],
'Written_Score': [8, 7, 6, 9, 5, 8, 7, 6, 9, 5, 8, 7, 6, 9, 5, 8,
7, 6, 9, 5, 8, 7, 6, 9, 5],
'Interview_Score': [10, 6, 7, 8, 9, 5, 8, 7, 6, 9, 5, 8, 7, 6, 9,
5, 8, 7, 6, 9, 5, 8, 7, 6, 9],
'Selection': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1] # 1 for selected, 0 for not selected
}

df = [Link](data)

# Save dataset to a .csv file


df.to_csv('candidates_dataset.csv', index=False)

# Load dataset
df = pd.read_csv('candidates_dataset.csv')

# Split dataset into features and target


X = [Link]('Selection', axis=1)
y = df['Selection']
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)

# Build Bayesian learning model


bayes_model = GaussianNB()

# Train the model


bayes_model.fit(X_train, y_train)

# Make predictions on testing data


y_pred = bayes_model.predict(X_test)

# Calculate classification metrics


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print classification metrics


print("Classification Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Predict status for unseen data


unseen_data = [Link]({
'Graduation_Percentage': [90, 75],
'Experience': [5, 8],
'Written_Score': [8, 7],
'Interview_Score': [10, 6]
})

# Make predictions for unseen data


unseen_predictions = bayes_model.predict(unseen_data)

# Save classification metrics to a .csv file


metrics_data = {
'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
'Value': [accuracy, precision, recall, f1]
}

metrics_df = [Link](metrics_data)
metrics_df.to_csv('classification_metrics.csv', index=False)

# Save predictions for unseen data to a .csv file


unseen_data['Selection'] = unseen_predictions
unseen_data.to_csv('unseen_predictions.csv', index=False)

Q3
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score, precision_score,
recall_score, f1_score

# Load the IRIS dataset


iris = load_iris()
X = [Link]
y = [Link]

# Define function to evaluate model performance


def evaluate_model(X_train, X_test, y_train, y_test):
clf = DecisionTreeClassifier(criterion='entropy')
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
return accuracy, precision, recall, f1

# Vary percentage of training data


percentages = [0.6, 0.7, 0.8, 0.9]
for percentage in percentages:
X_train, X_test, y_train, y_test = train_test_split(X, y,
train_size=percentage, random_state=42)
accuracy, precision, recall, f1 = evaluate_model(X_train, X_test,
y_train, y_test)
print(f"Percentage of training data: {percentage}")
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f},
Recall: {recall:.2f}, F1 Score: {f1:.2f}")
print()

# Explore effect of other decision tree parameters


parameters = {'max_depth': [None, 3, 5, 10], 'min_samples_split': [2,
5, 10]}
for max_depth in parameters['max_depth']:
for min_samples_split in parameters['min_samples_split']:
clf = DecisionTreeClassifier(criterion='entropy',
max_depth=max_depth, min_samples_split=min_samples_split)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Max Depth: {max_depth}, Min Samples Split:
{min_samples_split}")
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f},
Recall: {recall:.2f}, F1 Score: {f1:.2f}")
print()

Q4
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score, precision_score,
recall_score, f1_score

# Load the Classified Data


classified_data = pd.read_csv('classified_data.csv')

# Prepare data
X = classified_data.drop('TARGET CLASS', axis=1)
y = classified_data['TARGET CLASS']
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.3, random_state=42)

# Define functions to evaluate models


def evaluate_knn(n_neighbors):
clf = KNeighborsClassifier(n_neighbors=n_neighbors)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
return accuracy, precision, recall, f1

def evaluate_nb():
clf = GaussianNB()
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
return accuracy, precision, recall, f1

def evaluate_decision_tree(max_depth, min_samples_split):


clf = DecisionTreeClassifier(max_depth=max_depth,
min_samples_split=min_samples_split)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
return accuracy, precision, recall, f1

# Vary parameters for each model


knn_parameters = [1, 3, 5, 7, 9]
nb_parameters = []
decision_tree_parameters = [{'max_depth': None, 'min_samples_split':
2},
{'max_depth': 5, 'min_samples_split': 2},
{'max_depth': 10, 'min_samples_split': 2}]

# Evaluate models and save results to CSV


results = []
for n_neighbors in knn_parameters:
accuracy, precision, recall, f1 = evaluate_knn(n_neighbors)
[Link](['KNN', n_neighbors, accuracy, precision, recall,
f1])

accuracy, precision, recall, f1 = evaluate_nb()


[Link](['Naive Bayes', 'N/A', accuracy, precision, recall, f1])

for params in decision_tree_parameters:


accuracy, precision, recall, f1 =
evaluate_decision_tree(params['max_depth'],
params['min_samples_split'])
[Link](['Decision Tree', f"Max Depth:
{params['max_depth']}, Min Samples Split:
{params['min_samples_split']}", accuracy, precision, recall, f1])

# Create DataFrame and save to CSV


df = [Link](results, columns=['Model', 'Parameters', 'Accuracy',
'Precision', 'Recall', 'F1 Score'])
df.to_csv('model_comparison_results.csv', index=False)

The code below will create the .csv file for the above code ok Bro
import pandas as pd
import numpy as np

# Generate random data


[Link](42)
data = [Link]([Link](100, 5), columns=['Feature1',
'Feature2', 'Feature3', 'Feature4', 'Feature5'])

# Generate random target class (0 or 1)


data['TARGET CLASS'] = [Link](0, 2, size=len(data))

# Save data to CSV file


data.to_csv('classified_data.csv', index=False)

You might also like