0% found this document useful (0 votes)
35 views2 pages

Import Pandas As PD

The document outlines a Python script for analyzing solar data, including loading multiple CSV files, encoding categorical labels, and training machine learning models like Random Forest and SVM. It evaluates model performance using accuracy scores and confusion matrices, and visualizes feature importance and fault types in both 2D and interactive 3D plots. The script utilizes libraries such as pandas, scikit-learn, and Plotly for data manipulation and visualization.

Uploaded by

20112042paren
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
35 views2 pages

Import Pandas As PD

The document outlines a Python script for analyzing solar data, including loading multiple CSV files, encoding categorical labels, and training machine learning models like Random Forest and SVM. It evaluates model performance using accuracy scores and confusion matrices, and visualizes feature importance and fault types in both 2D and interactive 3D plots. The script utilizes libraries such as pandas, scikit-learn, and Plotly for data manipulation and visualization.

Uploaded by

20112042paren
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from mpl_toolkits.mplot3d import Axes3D

# Set plot styles


plt.rcParams.update({'font.size': 14, "font.family": "Times New Roman"})

# ----------------------
# Load Multiple CSV Files
# ----------------------
column_names = ['Time', 'Ipv', 'Vpv', 'Vdc', 'ia', 'ib', 'ic', 'va', 'vb', 'vc',
'Iabc', 'If', 'Vabc', 'Vf', 'Fault_Type', 'Power_Condition']
solar_data = pd.DataFrame(columns=column_names)

fault_types = ['F0', 'F1', 'F5']


power_conditions = ['LPP', 'MPP']

for fault in fault_types:


for power in power_conditions:
file_path = '/F0L.csv, /F0M.csv, /F1L.csv, /F1M.csv /F5L.csv, /F5M.csv'
df = pd.read_csv(file_path)
df['Fault_Type'] = fault
df['Power_Condition'] = power
solar_data = pd.concat([solar_data, df], ignore_index=True)

# Encode categorical labels


label_encoder = LabelEncoder()
solar_data['Fault_Type'] = label_encoder.fit_transform(solar_data['Fault_Type'])
solar_data['Power_Condition'] =
label_encoder.fit_transform(solar_data['Power_Condition'])

# Drop unnecessary columns


if 'Time' in solar_data.columns:
solar_data.drop(columns=['Time'], inplace=True)

# Define features and target


X = solar_data.drop(columns=['Fault_Type'])
y = solar_data['Fault_Type']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------------------
# Train & Evaluate Models
# ----------------------
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)


svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)

# ----------------------
# Model Evaluation
# ----------------------
def evaluate_model(y_true, y_pred, model_name):
print(f"\n{model_name} Results:")
print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
print(classification_report(y_true, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

evaluate_model(y_test, y_pred_rf, "Random Forest")


evaluate_model(y_test, y_pred_svm, "SVM")

# ----------------------
# Feature Importance (Random Forest)
# ----------------------
plt.figure(figsize=(10, 6))
feature_importance = pd.Series(rf_model.feature_importances_,
index=X.columns).sort_values(ascending=False)
sns.barplot(x=feature_importance, y=feature_importance.index, palette='viridis')
plt.xlabel("Feature Importance Score")
plt.ylabel("Features")
plt.title("Feature Importance from Random Forest")
plt.show()

# ----------------------
# 3D Visualization
# ----------------------
feature_x, feature_y, feature_z = 'Vpv', 'Ipv', 'Vdc'
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection="3d")
scatter = ax.scatter(solar_data[feature_x], solar_data[feature_y],
solar_data[feature_z],
c=solar_data["Fault_Type"], cmap="coolwarm", edgecolors="k")
ax.set_xlabel(feature_x)
ax.set_ylabel(feature_y)
ax.set_zlabel(feature_z)
ax.set_title("3D Visualization of Fault Types")
legend = ax.legend(*scatter.legend_elements(), title="Fault Type")
ax.add_artist(legend)
plt.show()

# Interactive 3D Plot using Plotly


fig = px.scatter_3d(solar_data, x=feature_x, y=feature_y, z=feature_z,
color=solar_data["Fault_Type"].astype(str),
title="Interactive 3D Visualization of Fault Types",
labels={"color": "Fault Type"})
fig.show()
s

You might also like