0% found this document useful (0 votes)

56 views10 pages

Assignment 2

This document discusses analyzing and modeling an alloy composition dataset using Python. It imports various libraries for data analysis and machine learning. It loads and explores the alloy composition and hardness (HV) data, examining distributions and correlations. It then fits several linear regression models to the data, including a manual calculation, Scikit-Learn implementation, and gradient descent optimization, and evaluates their performance on the training and test sets.

Uploaded by

Abhineet Kumar mm22m006

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

56 views10 pages

Assignment 2

Uploaded by

Abhineet Kumar mm22m006

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

import pandas as pd # To play with data tables

import [Link] as plt # To visualize data

import numpy as np
import copy

from sklearn.linear_model import LinearRegression

from [Link] import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from [Link] import make_regression

from sklearn.model_selection import KFold

!gdown 1i5c01hhj04J816siI‐6u3ea32vI‐HToy
Downloading...
From: [Link]
To: /content/alloy‐confp‐train‐[Link]
100% 7.22k/7.22k [00:00<00:00, 27.3MB/s]

data = pd.read_csv('/content/alloy‐confp‐train‐[Link]')

data = [Link](frac=1)
data.to_csv('/content/alloy‐confp‐train‐[Link]')

[Link]
(120, 8)

Xcols = [Link][[Link]('C.')]
X = data[Xcols]
X

[Link] [Link] [Link] [Link] [Link] [Link]

102 0.074074 0.000000 0.185185 0.185185 0.185185 0.370370

22 0.250000 0.166667 0.166667 0.083333 0.166667 0.166667

105 0.062500 0.000000 0.312500 0.000000 0.468750 0.156250

106 0.142857 0.285714 0.000000 0.000000 0.285714 0.285714

117 0.264706 0.147059 0.147059 0.147059 0.147059 0.147059

... ... ... ... ... ... ...

60 0.428571 0.142857 0.071429 0.071429 0.071429 0.214286

3 0.208333 0.000000 0.208333 0.208333 0.208333 0.166667

79 0.264706 0.000000 0.147059 0.147059 0.147059 0.294118

34 0.000000 0.250000 0.000000 0.250000 0.250000 0.250000

88 0.166667 0.166667 0.333333 0.000000 0.166667 0.166667

120 rows × 6 columns

from matplotlib import pyplot as plt

X['[Link]'].plot(kind='hist', bins=20, title='[Link]')
[Link]().spines[['top', 'right']].set_visible(False)
y = data['HV']

fig, (ax1,ax2,ax3,ax4) = [Link](nrows=1, ncols=4, figsize=(12,3.5))

[Link]([Link], bins=20)
[Link]([Link][:,0], bins=20, label='[Link]')
[Link]([Link][:,1], bins=20, label='[Link]')
[Link]([Link][:,2], bins=20, label='[Link]')

ax1.set_xlabel('HV', fontsize=14)
ax2.set_xlabel('[Link]', fontsize=14)
ax3.set_xlabel('[Link]', fontsize=14)
ax4.set_xlabel('[Link]', fontsize=14)

ax1.set_ylabel('Frequency', fontsize=14)
ax2.set_ylabel('Frequency', fontsize=14)
ax3.set_ylabel('Frequency', fontsize=14)
ax4.set_ylabel('Frequency', fontsize=14)

ax1.set_ylabel('Frequency', fontsize=14)
Text(0, 0.5, 'Frequency')
# First we will define function to make plots. This will make the code simpler.

def polt_parity(y_cv_test,y_pred_test, y_cv_train=None,y_pred_train=None, label=None, ylim=[50,900]):

"""
Function to make parity plots.
"""

# Plot Parity plot

rmse_test = [Link](mean_squared_error(y_cv_test,y_pred_test))
r2_test = r2_score(y_cv_test,y_pred_test)

if y_cv_train is None:
fig, ax1 = [Link](nrows=1, ncols=1, figsize=(5,4), sharey=True, sharex=True)
else:
fig, (ax1,ax2) = [Link](nrows=1, ncols=2, figsize=(9,4), sharey=True, sharex=True)

[Link](y_cv_test,y_pred_test)
[Link](0.95, 0.26, label, transform=[Link], ha='right', fontsize=14)
[Link](0.95, 0.18, "RMSE: %.2f"%rmse_test, transform=[Link], ha='right', fontsize=14)
[Link](0.95, 0.1, "R$^2$: %.2f"%r2_test, transform=[Link], ha='right', fontsize=14)
[Link](ylim, ylim, '‐‐k')
ax1.set_xlabel('True y', fontsize=14)
ax1.set_ylabel('Pred y', fontsize=14)
ax1.set_xlim(ylim[0],ylim[1])
ax1.set_ylim(ylim[0],ylim[1])

if y_cv_train is not None:

rmse_train = [Link](mean_squared_error(y_cv_train,y_pred_train))
r2_train = r2_score(y_cv_train,y_pred_train)

[Link](y_cv_train,y_pred_train, c='m')
[Link](0.95, 0.26, "Train", transform=[Link], ha='right', fontsize=14)
[Link](0.95, 0.18, "RMSE: %.2f"%rmse_train, transform=[Link], ha='right', fontsize=14
[Link](0.95, 0.1, "R2: %.2f"%r2_train, transform=[Link], ha='right', fontsize=14)
[Link](ylim, ylim, '‐‐k')

ax2.set_xlabel('True y', fontsize=14)

ax2.set_xlim(ylim[0],ylim[1])
ax2.set_ylim(ylim[0],ylim[1])

plt.tight_layout()
[Link]()

return None
X_tras_X_inv = [Link]([Link](X.T,X))
X_tras_y = [Link](X.T,y)
w_cap_vec = [Link](X_tras_X_inv,X_tras_y)
y_pred_manual = [Link](X,w_cap_vec)

polt_parity(y,y_pred_manual, label="Train")

lr = LinearRegression(fit_intercept=False)
model = [Link](X,y)
lr_model = [Link](model)

y_pred = [Link](X)
polt_parity(y,y_pred, label="Train")
print("Sklearn model: ", lr_model.coef_)
print("Eq. based model: ", w_cap_vec)
Sklearn model: [1589.03703891 154.02145017 647.00169133 279.68594241 204.32826373
‐241.42532589]
Eq. based model: [1589.03703891 154.02145017 647.00169133 279.68594241 204.32826373
‐241.42532589]

y_avg_pred = [[Link]()]*len(y)
print("Root mean squared error: %.2f" % [Link](mean_squared_error(y, y_avg_pred)))

# The coefficient of determination: 1 is perfect prediction

print("Coefficient of determination: %.2f" % r2_score(y, y_avg_pred))
Root mean squared error: 186.35
Coefficient of determination: 0.00

errors = []
rmse_avg = []
for i in range(2,12):
kf = KFold(i)
rmses = []
for idx, (train, test) in enumerate([Link](X)):
X_cv_train = [Link][train]
X_cv_test = [Link][test]

y_cv_train = [Link][train]
y_cv_test = [Link][test]

# Model fit and prediction

model = [Link](X_cv_train,y_cv_train)
y_pred_test = [Link](X_cv_test)
y_pred_train = [Link](X_cv_train)

# Computing errors
rmse_test = [Link](mean_squared_error(y_cv_test, y_pred_test))
rmse_train = [Link](mean_squared_error(y_cv_train, y_pred_train))

r2_test = r2_score(y_cv_test, y_pred_test)

r2_train = r2_score(y_cv_train, y_pred_train)

# Plot Parity plot

# polt_parity(y_cv_test,y_pred_test, y_cv_train,y_pred_train)
# print("Root mean squared error: %.2f" % rmse_test)
# print("Coefficient of determination: %.2f" % r2_test)

[Link](rmse_test)

rmse_avg.append(sum(rmses)/len(rmses))

rmse_avg
[90.90410023685317,
86.94150026120126,
86.89238900991143,
86.44250692124533,
87.09357006766437,
87.02679087151705,
85.82328460961962,
85.80805780460595,
86.6502684030654,
87.13891194799572]

x= range(2,12)

errors = [Link](errors)
[Link](x,rmse_avg)
[Link]('Average Validation RMSE vs. Number of Folds (k)')
[Link]('Number of Folds (k)')
[Link]('Average Validation RMSE')
[Link](True)
[Link]()

[Link][1]
6

num_iterations = 1000
learning_rate = 0.01

weights = [Link]([Link][1])
bias = 0

def cost_function(weights, bias, X, y):

predictions = [Link](X, weights) + bias
cost = [Link]([Link]((predictions y) 2))
return cost

def gradient_descent(weights, bias, X, y, learning_rate, num_iterations):

for i in range(num_iterations):
predictions = [Link](X, weights) + bias
gradient_weights = [Link](X.T, (predictions ‐ y))
gradient_bias = [Link](predictions ‐ y)
weights ‐= learning_rate * gradient_weights
bias ‐= learning_rate * gradient_bias
if i % 100 == 0:
print(f"Iteration {i}: Cost = {cost_function(weights, bias, X, y)}")
return weights, bias

weights, bias = gradient_descent(weights, bias, X, y, learning_rate, num_iterations)

print(f"Optimal weights: {weights}")

print(f"Optimal bias: {bias}")
Iteration 0: Cost = 255.67893485794872
Iteration 100: Cost = 87.65877256028017
Iteration 200: Cost = 82.03876320532915
Iteration 300: Cost = 81.60425558281177
Iteration 400: Cost = 81.55097669962204
Iteration 500: Cost = 81.54245710489937
Iteration 600: Cost = 81.54084599370569
Iteration 700: Cost = 81.5405029810298
Iteration 800: Cost = 81.54042383588254
Iteration 900: Cost = 81.54040461622849
Optimal weights: [1212.94760702 ‐221.99136401 270.9281074 ‐96.50510823 ‐171.89813133
‐617.39672497]
Optimal bias: 376.08439070091987

weights

array([1212.94760702, ‐221.99136401, 270.9281074 , ‐96.50510823,

‐171.89813133, ‐617.39672497])

Assign-01 On ML of Transport Phenomena
No ratings yet
Assign-01 On ML of Transport Phenomena
30 pages
AIML Project
No ratings yet
AIML Project
4 pages
1
No ratings yet
1
13 pages
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
No ratings yet
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
4 pages
Python Cheat Sheet For Data Analysis
No ratings yet
Python Cheat Sheet For Data Analysis
2 pages
Cheat Sheet Modeldeploy
No ratings yet
Cheat Sheet Modeldeploy
2 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
Zerox Ready
No ratings yet
Zerox Ready
21 pages
Understanding EXP in Machine Learning
No ratings yet
Understanding EXP in Machine Learning
4 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Group Work Assignment Supervised and Unsupervised Learning
No ratings yet
Group Work Assignment Supervised and Unsupervised Learning
10 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
ML All Projectpdf Removed
No ratings yet
ML All Projectpdf Removed
41 pages
Advanced Regression with IPL Data
No ratings yet
Advanced Regression with IPL Data
25 pages
ML II Lab
No ratings yet
ML II Lab
5 pages
Car Evaluation Data Analysis & Random Forest Model
No ratings yet
Car Evaluation Data Analysis & Random Forest Model
12 pages
Machine Learning Evaluation Guide
100% (1)
Machine Learning Evaluation Guide
504 pages
Aiml Programs
No ratings yet
Aiml Programs
12 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
Print Out ML - Finallllllllllllllll
No ratings yet
Print Out ML - Finallllllllllllllll
11 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
ML Lab1
No ratings yet
ML Lab1
11 pages
Fibulae Neighbors Analysis
No ratings yet
Fibulae Neighbors Analysis
7 pages
Slip
No ratings yet
Slip
5 pages
Minor Lab
No ratings yet
Minor Lab
4 pages
AI&ML
No ratings yet
AI&ML
9 pages
MIS710A1 Surname studentID Python
No ratings yet
MIS710A1 Surname studentID Python
2 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
DA Lab2
No ratings yet
DA Lab2
5 pages
Bacdeaf 23032025 115708 Split 1
No ratings yet
Bacdeaf 23032025 115708 Split 1
37 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
1st PGM
No ratings yet
1st PGM
10 pages
Ash Regression
No ratings yet
Ash Regression
11 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
Assignment 3
No ratings yet
Assignment 3
8 pages
ML Manual
No ratings yet
ML Manual
30 pages
ML
No ratings yet
ML
17 pages
Data Analytics
No ratings yet
Data Analytics
10 pages
Regression Analysis Cheat Sheet
No ratings yet
Regression Analysis Cheat Sheet
9 pages
Advanced Machine Learning Course Guide
No ratings yet
Advanced Machine Learning Course Guide
36 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
ML Internal Questions
No ratings yet
ML Internal Questions
15 pages
Iii Aid - ML
No ratings yet
Iii Aid - ML
30 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
ML Lab Record
No ratings yet
ML Lab Record
17 pages
Ai Last 5
No ratings yet
Ai Last 5
4 pages
SiddharthShah 1032221195 DivC 50 DL LabAssignment2
No ratings yet
SiddharthShah 1032221195 DivC 50 DL LabAssignment2
7 pages
Experiment 1111
No ratings yet
Experiment 1111
25 pages
Ex 3
No ratings yet
Ex 3
5 pages
Neural Network Regression for Alloy Data
No ratings yet
Neural Network Regression for Alloy Data
38 pages
Dflyw9x3wm16 ML B1
No ratings yet
Dflyw9x3wm16 ML B1
9 pages
Assignment 2 ML
No ratings yet
Assignment 2 ML
11 pages
ML L - Ab
No ratings yet
ML L - Ab
13 pages
DSBDA Prac4 2
No ratings yet
DSBDA Prac4 2
1 page
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Regression Prac 9
No ratings yet
Regression Prac 9
8 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
Program
No ratings yet
Program
10 pages
HD-70 Series Chartplotter & Fishfinder
No ratings yet
HD-70 Series Chartplotter & Fishfinder
2 pages
Kanban For Support and Operations
No ratings yet
Kanban For Support and Operations
2 pages
Fuzzy Logic: Applications and Uses
No ratings yet
Fuzzy Logic: Applications and Uses
3 pages
How To Use Ehello Teacher Centers
No ratings yet
How To Use Ehello Teacher Centers
10 pages
VMM Golden Reference Guide Jan 2010
No ratings yet
VMM Golden Reference Guide Jan 2010
364 pages
Enhanced Crossflashing for LG Drives
No ratings yet
Enhanced Crossflashing for LG Drives
4 pages
History Performance Cell (UMTS) Logic Cell (UMTS) 20180418092121
No ratings yet
History Performance Cell (UMTS) Logic Cell (UMTS) 20180418092121
42 pages
Haxarg Game Room Settings
No ratings yet
Haxarg Game Room Settings
239 pages
Timeline Analysis Overview and Examples
No ratings yet
Timeline Analysis Overview and Examples
53 pages
Randomized Document Generator for ML
No ratings yet
Randomized Document Generator for ML
3 pages
NS-2 Network Simulation Overview
No ratings yet
NS-2 Network Simulation Overview
46 pages
Sigma SI - Operation Manual
No ratings yet
Sigma SI - Operation Manual
12 pages
UX Design: From Usability to Experience
No ratings yet
UX Design: From Usability to Experience
11 pages
2.1. LECTURE PPT - Why Cyber Security Is So Important v1
No ratings yet
2.1. LECTURE PPT - Why Cyber Security Is So Important v1
20 pages
System Monitoring
No ratings yet
System Monitoring
5 pages
Techniques of Malware Analysis
No ratings yet
Techniques of Malware Analysis
2 pages
BrightSpeed Elite/Optima CT540 Manual
0% (1)
BrightSpeed Elite/Optima CT540 Manual
146 pages
BCC-302 Unit1 Lecture 1
No ratings yet
BCC-302 Unit1 Lecture 1
17 pages
CSCE 5150 Mid-Term Exam Spring 2022
No ratings yet
CSCE 5150 Mid-Term Exam Spring 2022
26 pages
MBA Exam: Management Information Systems
No ratings yet
MBA Exam: Management Information Systems
2 pages
Anna University ECE Curriculum 2017
No ratings yet
Anna University ECE Curriculum 2017
6 pages
Information Technology and E-Commerce Assignment
No ratings yet
Information Technology and E-Commerce Assignment
14 pages
E-Commerce Chapter 1
No ratings yet
E-Commerce Chapter 1
7 pages
Dromey Quality Model
No ratings yet
Dromey Quality Model
4 pages
Word 2013 Formatting Guide
No ratings yet
Word 2013 Formatting Guide
13 pages
Current Log
No ratings yet
Current Log
36 pages
Sketchup & Vray Resource Kit for Designers
No ratings yet
Sketchup & Vray Resource Kit for Designers
12 pages
Manual Testing Interview Questions & Answers-PART4
No ratings yet
Manual Testing Interview Questions & Answers-PART4
5 pages
Data Sheet
No ratings yet
Data Sheet
40 pages
Arm Mini: Quick Start Manual
No ratings yet
Arm Mini: Quick Start Manual
37 pages

Assignment 2

Uploaded by

Assignment 2

Uploaded by

import pandas as pd # To play with data tables

import [Link] as plt # To visualize data

from sklearn.linear_model import LinearRegression

from sklearn.model_selection import KFold

[Link] [Link] [Link] [Link] [Link] [Link]

102 0.074074 0.000000 0.185185 0.185185 0.185185 0.370370

22 0.250000 0.166667 0.166667 0.083333 0.166667 0.166667

105 0.062500 0.000000 0.312500 0.000000 0.468750 0.156250

106 0.142857 0.285714 0.000000 0.000000 0.285714 0.285714

117 0.264706 0.147059 0.147059 0.147059 0.147059 0.147059

... ... ... ... ... ... ...

60 0.428571 0.142857 0.071429 0.071429 0.071429 0.214286

3 0.208333 0.000000 0.208333 0.208333 0.208333 0.166667

79 0.264706 0.000000 0.147059 0.147059 0.147059 0.294118

34 0.000000 0.250000 0.000000 0.250000 0.250000 0.250000

88 0.166667 0.166667 0.333333 0.000000 0.166667 0.166667

120 rows × 6 columns

from matplotlib import pyplot as plt

fig, (ax1,ax2,ax3,ax4) = [Link](nrows=1, ncols=4, figsize=(12,3.5))

def polt_parity(y_cv_test,y_pred_test, y_cv_train=None,y_pred_train=None, label=None, ylim=[50,900]):

# Plot Parity plot

if y_cv_train is not None:

ax2.set_xlabel('True y', fontsize=14)

# The coefficient of determination: 1 is perfect prediction

# Model fit and prediction

r2_test = r2_score(y_cv_test, y_pred_test)

# Plot Parity plot

def cost_function(weights, bias, X, y):

def gradient_descent(weights, bias, X, y, learning_rate, num_iterations):

weights, bias = gradient_descent(weights, bias, X, y, learning_rate, num_iterations)

print(f"Optimal weights: {weights}")

array([1212.94760702, ‐221.99136401, 270.9281074 , ‐96.50510823,

You might also like