0% found this document useful (0 votes)
69 views

Soft Sensor Code

The document discusses building and evaluating artificial neural network and random forest models to predict variables from datasets. It loads and preprocesses datasets, builds ANN and RF models using Keras and scikit-learn, evaluates the models with various metrics like MSE, MAE, R-squared, and visualizes predictions versus actual values with scatter plots and linear regression lines. Feature importances from the RF model are also examined.

Uploaded by

Marvin Martins
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
69 views

Soft Sensor Code

The document discusses building and evaluating artificial neural network and random forest models to predict variables from datasets. It loads and preprocesses datasets, builds ANN and RF models using Keras and scikit-learn, evaluates the models with various metrics like MSE, MAE, R-squared, and visualizes predictions versus actual values with scatter plots and linear regression lines. Feature importances from the RF model are also examined.

Uploaded by

Marvin Martins
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

#ARTIFICIAL NEURAL NETWORK

from numpy import loadtxt


from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras.layers import Activation
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import matplotlib.patches as mpl_patches
from sklearn.metrics import mean_absolute_error
from math import sqrt
plt.rcParams["font.family"] = "Cambria"
from matplotlib.lines import Line2D
import time
t = time.process_time()

def mean_absolute_percentage_error(y_test, y_predicted):


y_test, y_predicted = np.array(y_test), np.array(y_predicted)
return np.mean(np.abs((y_test - y_predicted) / y_test)) * 100

#Load data
dataset = loadtxt("ANN V4.csv", delimiter=',')
X = dataset[:,0:5]
y = dataset[:,8]

#Scale the data 0-1


min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
# split into training and testing data
X_train, X_val_and_test, y_train, y_val_and_test = train_test_split(X_scale, y,
test_size=0.3,random_state=3)
X_val, X_test, y_val, y_test = train_test_split(X_val_and_test, y_val_and_test,
test_size=0.5,random_state=3)
print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape,
y_test.shape)

rsqrddata = []
msedata = []
maedata = []
mapedata = []

model = Sequential()
model.add(Dense(32, input_dim=5, activation='relu'))
model.add(Activation("linear"))
model.add(Dense(20, activation='sigmoid'))
model.add(Dense(1, activation='relu'))
model.summary()

# compile the keras model


model.compile(optimizer='adam', loss='mse', metrics=['mae' , 'mape', 'mse' ])
# fit the keras model on the dataset
np.random.seed(3)
history = model.fit(X_train, y_train, epochs=30, batch_size=2,
validation_data=(X_val, y_val), verbose=0)
# evaluate the keras model
model.evaluate(X_test, y_test)

#Predict
y_predicted = model.predict(X_test)

#Metrics
Rsquared = (r2_score(y_test, y_predicted))
print("Rsquared : %f" %Rsquared)
mse = mean_squared_error(y_test, y_predicted)
print("MSE : %f" % mse)
mae = mean_absolute_error(y_test, y_predicted)
print ("MAE: %f" % mae)
mape = mean_absolute_percentage_error(y_test, y_predicted)
print ("MAPE: %f" % mape)
rmse = sqrt(mse)
print ("RMSE: %f" % rmse)

rsqrddata.append(Rsquared)
msedata.append(mse)
maedata.append(mae)
mapedata.append(mape)

#GRAPHS
fig, ax = plt.subplots()
ax.scatter(y_test, y_predicted ,s=10,color='mediumseagreen',linewidths=1)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k-', lw=1.75)
ax.set_xlabel('Actual (%)',fontsize='x-large')
ax.set_ylabel('Predicted (%)',fontsize='x-large')
y_test, y_predicted = y_test.reshape(-1,1), y_predicted.reshape(-1,1)
ax.plot(y_test, LinearRegression().fit(y_test, y_predicted).predict(y_test),
color="red", lw=1.75)
ax.set_title('CH4 Conversion')
handles = [mpl_patches.Rectangle((0, 0), 1, 1, fc="white", ec="white",
lw=0, alpha=0)] * 4
# create the corresponding number of labels (= the text you want to display)
lines = []
handles[0] = Line2D([0], [0], color='red',lw=3)
labels = []
labels.append("R² = {0:.3g}".format(Rsquared))
labels.append("RMSE = {0:.2g}".format(rmse))
labels.append("MAE = {0:.2g}".format(mae))
ax.legend(handles, labels, loc='best', fontsize='xx-large',
fancybox=True, framealpha=0.7)
plt.show()

#RANDOM FOREST

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.patches as mpl_patches
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
plt.rcParams["font.family"] = "Cambria"
from matplotlib.lines import Line2D
plt.rcParams["font.family"] = "Cambria"

data = pd.read_csv('RF V4.csv')


data.head(4)
print ('The shape of our data is:', data.shape)

data.describe()
data = pd.get_dummies(data)
data.iloc[:,1:].head(1)
labels = np.array(data['H2'])
data= data.drop('H2', axis = 1)
data_list = list(data.columns)
data = np.array(data)

from sklearn.model_selection import train_test_split


train_data, test_data, train_labels, test_labels = train_test_split(data, labels,
test_size = 0.2, random_state = 42)

print('Training Data Shape:', train_data.shape)


print('Training Labels Shape:', train_labels.shape)
print('Testing Data Shape:', test_data.shape)
print('Testing Labels Shape:', test_labels.shape)

baseline_preds = test_data[:, data_list.index('CaO/C')]


baseline_errors = abs(baseline_preds - test_labels)
print('Average baseline error: ', round(np.mean(baseline_errors), 4))

from sklearn.ensemble import RandomForestRegressor


rf = RandomForestRegressor(n_estimators = 2000, min_samples_leaf=3, max_features=2,
max_depth=8, random_state = 60)
rf.fit(train_data, train_labels);

#Metrics
Rsquared = (r2_score(test_labels, predictions))
print("Rsquared : %f" %Rsquared)
_mse = mean_squared_error(test_labels, predictions)
print("MSE : %f" % _mse)
mae = mean_absolute_error(test_labels, predictions)
print ("MAE: %f" % mae)
rmse = sqrt(_mse)
print ("RMSE: %f" % rmse)

errors = abs(predictions - test_labels)


print('Mean Absolute Error:', round(np.mean(errors), 6))

mape = 100 * (errors / test_labels)


accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 4), '%.')
print('MAPE:', round(np.mean(mape), 4), '%.')

importances = list(rf.feature_importances_)
feature_importances_ = [(feature, round(importance, 4)) for feature, importance in
zip(data_list, importances)]
feature_importances_ = sorted(feature_importances_, key = lambda x: x[1], reverse =
True)
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in
feature_importances_];

#Graph
fig, ax = plt.subplots()
ax.scatter(test_labels, predictions ,s=10,color='MEDIUMSEAGREEN',linewidths=1)
ax.plot([labels.min(), labels.max()], [labels.min(), labels.max()], 'k-', lw=1.75)
ax.set_xlabel('Actual (%)',fontsize='x-large')
ax.set_ylabel('Predicted (%)',fontsize='x-large')
test_labels, predictions = test_labels.reshape(-1,1), predictions.reshape(-1,1)
ax.plot(test_labels, LinearRegression().fit(test_labels,
predictions).predict(test_labels), color="red", lw=1.75)

ax.set_title('H2O Reformer (%): Actual vs Predicted')


handles = [mpl_patches.Rectangle((0, 0), 1, 1, fc="white", ec="white",
lw=0, alpha=0)] * 4
lines = []
handles[0] = Line2D([0], [0], color='red',lw=3)
labels = []
labels.append("R² = {0:.3g}".format(Rsquared))
labels.append("RMSE = {0:.2g}".format(rmse))
labels.append("MAE = {0:.2g}".format(mae))
ax.legend(handles, labels, loc='best', fontsize='xx-large',
fancybox=True, framealpha=0.7)

plt.show()

You might also like