Q1
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import KNeighborsRegressor
# Step 1: Generate synthetic dataset
data = {
'Experience': [5, 8, 3, 10, 2, 7],
'Written_Score': [8, 7, 6, 9, 5, 8],
'Interview_Score': [10, 6, 7, 8, 9, 5],
'Salary': [60000, 80000, 45000, 90000, 35000, 75000]
}
df = [Link](data)
# Step 2: Save dataset to a .csv file
df.to_csv('candidates_dataset.csv', index=False)
# Step 3: Load dataset
df = pd.read_csv('candidates_dataset.csv')
# Step 4: Split dataset into features and target
X = [Link]('Salary', axis=1)
y = df['Salary']
# Step 5: Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Step 6: Standardize features (optional)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = [Link](X_test)
# Step 7: Build KNN model
knn_model = KNeighborsRegressor(n_neighbors=3) # Specify the value of
K
# Step 8: Train the model
knn_model.fit(X_train_scaled, y_train)
# Step 9: Make predictions on the testing set
y_pred = knn_model.predict(X_test_scaled)
# Step 10: Use the trained model to predict salaries for new candidates
new_candidates = [Link]({
'Experience': [5, 8],
'Written_Score': [8, 7],
'Interview_Score': [10, 6]
})
# Standardize the new candidate data
new_candidates_scaled = [Link](new_candidates)
# Predict salaries for new candidates
predicted_salaries = knn_model.predict(new_candidates_scaled)
print("Predicted salaries for new candidates:")
for i, salary in enumerate(predicted_salaries):
print(f"Candidate {i+1}: ${salary:.2f}")
Q2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score, precision_score,
recall_score, f1_score
# Create dataset
data = {
'Graduation_Percentage': [80, 85, 70, 75, 90, 95, 85, 88, 92, 78,
83, 87, 79, 84, 91, 86, 93, 89, 76, 81, 74, 77, 82, 88, 94],
'Experience': [5, 8, 3, 6, 9, 2, 4, 7, 10, 2, 6, 3, 5, 7, 4, 8, 10,
9, 3, 7, 4, 6, 2, 8, 5],
'Written_Score': [8, 7, 6, 9, 5, 8, 7, 6, 9, 5, 8, 7, 6, 9, 5, 8,
7, 6, 9, 5, 8, 7, 6, 9, 5],
'Interview_Score': [10, 6, 7, 8, 9, 5, 8, 7, 6, 9, 5, 8, 7, 6, 9,
5, 8, 7, 6, 9, 5, 8, 7, 6, 9],
'Selection': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1] # 1 for selected, 0 for not selected
}
df = [Link](data)
# Save dataset to a .csv file
df.to_csv('candidates_dataset.csv', index=False)
# Load dataset
df = pd.read_csv('candidates_dataset.csv')
# Split dataset into features and target
X = [Link]('Selection', axis=1)
y = df['Selection']
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Build Bayesian learning model
bayes_model = GaussianNB()
# Train the model
bayes_model.fit(X_train, y_train)
# Make predictions on testing data
y_pred = bayes_model.predict(X_test)
# Calculate classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Print classification metrics
print("Classification Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
# Predict status for unseen data
unseen_data = [Link]({
'Graduation_Percentage': [90, 75],
'Experience': [5, 8],
'Written_Score': [8, 7],
'Interview_Score': [10, 6]
})
# Make predictions for unseen data
unseen_predictions = bayes_model.predict(unseen_data)
# Save classification metrics to a .csv file
metrics_data = {
'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
'Value': [accuracy, precision, recall, f1]
}
metrics_df = [Link](metrics_data)
metrics_df.to_csv('classification_metrics.csv', index=False)
# Save predictions for unseen data to a .csv file
unseen_data['Selection'] = unseen_predictions
unseen_data.to_csv('unseen_predictions.csv', index=False)
Q3
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score, precision_score,
recall_score, f1_score
# Load the IRIS dataset
iris = load_iris()
X = [Link]
y = [Link]
# Define function to evaluate model performance
def evaluate_model(X_train, X_test, y_train, y_test):
clf = DecisionTreeClassifier(criterion='entropy')
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
return accuracy, precision, recall, f1
# Vary percentage of training data
percentages = [0.6, 0.7, 0.8, 0.9]
for percentage in percentages:
X_train, X_test, y_train, y_test = train_test_split(X, y,
train_size=percentage, random_state=42)
accuracy, precision, recall, f1 = evaluate_model(X_train, X_test,
y_train, y_test)
print(f"Percentage of training data: {percentage}")
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f},
Recall: {recall:.2f}, F1 Score: {f1:.2f}")
print()
# Explore effect of other decision tree parameters
parameters = {'max_depth': [None, 3, 5, 10], 'min_samples_split': [2,
5, 10]}
for max_depth in parameters['max_depth']:
for min_samples_split in parameters['min_samples_split']:
clf = DecisionTreeClassifier(criterion='entropy',
max_depth=max_depth, min_samples_split=min_samples_split)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Max Depth: {max_depth}, Min Samples Split:
{min_samples_split}")
print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f},
Recall: {recall:.2f}, F1 Score: {f1:.2f}")
print()
Q4
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score, precision_score,
recall_score, f1_score
# Load the Classified Data
classified_data = pd.read_csv('classified_data.csv')
# Prepare data
X = classified_data.drop('TARGET CLASS', axis=1)
y = classified_data['TARGET CLASS']
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.3, random_state=42)
# Define functions to evaluate models
def evaluate_knn(n_neighbors):
clf = KNeighborsClassifier(n_neighbors=n_neighbors)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
return accuracy, precision, recall, f1
def evaluate_nb():
clf = GaussianNB()
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
return accuracy, precision, recall, f1
def evaluate_decision_tree(max_depth, min_samples_split):
clf = DecisionTreeClassifier(max_depth=max_depth,
min_samples_split=min_samples_split)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
return accuracy, precision, recall, f1
# Vary parameters for each model
knn_parameters = [1, 3, 5, 7, 9]
nb_parameters = []
decision_tree_parameters = [{'max_depth': None, 'min_samples_split':
2},
{'max_depth': 5, 'min_samples_split': 2},
{'max_depth': 10, 'min_samples_split': 2}]
# Evaluate models and save results to CSV
results = []
for n_neighbors in knn_parameters:
accuracy, precision, recall, f1 = evaluate_knn(n_neighbors)
[Link](['KNN', n_neighbors, accuracy, precision, recall,
f1])
accuracy, precision, recall, f1 = evaluate_nb()
[Link](['Naive Bayes', 'N/A', accuracy, precision, recall, f1])
for params in decision_tree_parameters:
accuracy, precision, recall, f1 =
evaluate_decision_tree(params['max_depth'],
params['min_samples_split'])
[Link](['Decision Tree', f"Max Depth:
{params['max_depth']}, Min Samples Split:
{params['min_samples_split']}", accuracy, precision, recall, f1])
# Create DataFrame and save to CSV
df = [Link](results, columns=['Model', 'Parameters', 'Accuracy',
'Precision', 'Recall', 'F1 Score'])
df.to_csv('model_comparison_results.csv', index=False)
The code below will create the .csv file for the above code ok Bro
import pandas as pd
import numpy as np
# Generate random data
[Link](42)
data = [Link]([Link](100, 5), columns=['Feature1',
'Feature2', 'Feature3', 'Feature4', 'Feature5'])
# Generate random target class (0 or 1)
data['TARGET CLASS'] = [Link](0, 2, size=len(data))
# Save data to CSV file
data.to_csv('classified_data.csv', index=False)