0% found this document useful (0 votes)
16 views6 pages

Code 4

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views6 pages

Code 4

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/random.csv', parse_dates=['Date'],


index_col='Date')

#Correlation

import matplotlib.pyplot as plt

# Plot the time series

plt.figure(figsize=(14, 7))

plt.plot(df['CORN'], label='Corn Prices')

plt.plot(df['Raw Sugar'], label='Raw Sugar Prices')

plt.title('Corn vs. Raw Sugar Prices Over Time')

plt.xlabel('Date')

plt.ylabel('Price')

plt.legend()

plt.show()

# Correlation analysis

correlation = df['CORN'].corr(df['Raw Sugar'])

print(f"Correlation between Corn and Raw Sugar prices: {correlation}")

#Test Stationarity

from statsmodels.tsa.stattools import adfuller

# Define a function to perform the ADF test

def adf_test(series):

result = adfuller(series)

print(f'ADF Statistic: {result[0]}')


print(f'p-value: {result[1]}')

for key, value in result[4].items():

print('Critial Values:')

print(f' {key}, {value}')

print("Corn Price Stationarity Test")

adf_test(df['CORN'])

print("\nRaw Sugar Price Stationarity Test")

adf_test(df['Raw Sugar'])

# Differencing if needed

df['Corn_diff'] = df['CORN'].diff().dropna()

df['Raw_Sugar_diff'] = df['Raw Sugar'].diff().dropna()

print("Corn Price Stationarity Test")

adf_test(df['Corn_diff'].dropna())

print("\nRaw Sugar Price Stationarity Test")

adf_test(df['Raw_Sugar_diff'].dropna())

#Caluculate ACF

from pmdarima import auto_arima

# Auto-ARIMA to find best parameters for Corn

corn_auto_model = auto_arima(df['Corn_diff'].dropna(), seasonal=False, trace=True)

# Auto-ARIMA to find best parameters for Raw Sugar

sugar_auto_model = auto_arima(df['Raw_Sugar_diff'].dropna(), seasonal=False, trace=True)

#Forecast

from statsmodels.tsa.arima.model import ARIMA


# Fit ARIMA model on differenced data if required

corn_model = ARIMA(df['Corn_diff'].dropna(), order=(0,0,0)) # adjust order based on ACF and PACF

corn_fit = corn_model.fit()

print(corn_fit.summary())

# Forecast Corn Prices

corn_forecast = corn_fit.forecast(steps=12)

print("Corn Price Forecast:", corn_forecast)

# Similar approach for Raw Sugar

sugar_model = ARIMA(df['Raw_Sugar_diff'].dropna(), order=(0,0,0))

sugar_fit = sugar_model.fit()

print(sugar_fit.summary())

# Forecast Raw Sugar Prices

sugar_forecast = sugar_fit.forecast(steps=100)

print("Raw Sugar Price Forecast:", sugar_forecast)

# Assuming df.index is of type string, convert it to datetime

df.index = pd.to_datetime(df.index, format="%d-%m-%Y")

# Create a date range for the forecasted values

forecast_dates = pd.date_range(df.index[-1] + pd.Timedelta(days=1), periods=100, freq='D')

# Plotting

plt.figure(figsize=(14, 7))

# Plot original data

#plt.plot(df['Corn_diff'].iloc[-500:], label='Corn Prices', color='blue')

plt.plot(df['Raw_Sugar_diff'].iloc[-500:], label='Raw Sugar Prices', color='green')


# Plot forecasted data

#plt.plot(forecast_dates, corn_forecast, label='Corn Forecast', color='blue', linestyle='--')

plt.plot(forecast_dates, sugar_forecast, label='Raw Sugar Forecast', color='green', linestyle='--')

plt.title('Corn and Raw Sugar Prices with Forecast')

plt.xlabel('Date')

plt.ylabel('Price')

plt.legend()

plt.show()

# VAR Modelling

from statsmodels.tsa.api import VAR

# Fit a VAR model

model = VAR(df[['CORN', 'Raw Sugar']])

model_fitted = model.fit(maxlags=None, ic='aic')

# Display the model summary

print(model_fitted.summary())

print(len(model_fitted.coefs))

#if len(model_fitted.coefs) > 0:

# Forecast the next 10 periods

forecast_steps = 100

forecast = model_fitted.forecast(df[['CORN', 'Raw Sugar']].values[-model_fitted.k_ar:],


steps=forecast_steps)

# Convert forecast to a DataFrame

forecast_index = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps,


freq='D')

forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['CORN', 'Raw Sugar'])

print(forecast_df)
# Plot the forecasted values

plt.figure(figsize=(12, 6))

plt.plot(df.index, df['Raw Sugar'], label='Historical Sugar Price', color='blue')

plt.plot(forecast_df.index, forecast_df['Raw Sugar'], label='Forecasted Sugar Price', color='red')

plt.title('Sugar Price Forecast Using VAR Model')

plt.xlabel('Date')

plt.ylabel('Price')

plt.legend()

plt.show()

#train model

# Split data into training and testing sets (80/20 split)

train_size = int(len(df) * 0.8)

train, test = df[:train_size], df[train_size:]

# Fit the model on the training data

model_fitted_train = model.fit(maxlags=None, ic='aic')

# Predict on the test data

test_forecast = model_fitted_train.forecast(train[['CORN', 'Raw Sugar', 'USDBRL']].values[-


model_fitted_train.k_ar:], steps=len(test))

# Compare actual vs predicted

test_forecast_df = pd.DataFrame(test_forecast, index=test.index, columns=['CORN', 'Raw Sugar',


'USDBRL'])

plt.figure(figsize=(12, 6))

plt.plot(test.index, test['Raw Sugar'], label='Actual Sugar Price', color='blue')

plt.plot(test_forecast_df.index, test_forecast_df['Raw Sugar'], label='Predicted Sugar Price',


color='red')

plt.title('Actual vs Predicted Sugar Prices')

plt.xlabel('Date')

plt.ylabel('Price')
plt.legend()

plt.show()

#test model

from sklearn.metrics import mean_squared_error

# Calculate RMSE for the model's performance on test data

rmse = mean_squared_error(test['Raw Sugar'], test_forecast_df['Raw Sugar'], squared=False)

print(f'Root Mean Squared Error (RMSE): {rmse}')

import numpy as np

# Define the function to calculate MAPE

def mean_absolute_percentage_error(y_true, y_pred):

# Convert to numpy arrays (in case they are lists or pandas series)

y_true = np.array(y_true)

y_pred = np.array(y_pred)

# Avoid division by zero by replacing zeros with a small value

epsilon = 1e-10

y_true = np.where(y_true == 0, epsilon, y_true)

# Calculate MAPE

mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

return mape

mape = mean_absolute_percentage_error(test['Raw Sugar'], test_forecast_df['Raw Sugar'])

print(mape)

You might also like