import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/random.csv', parse_dates=['Date'],
index_col='Date')
#Correlation
import matplotlib.pyplot as plt
# Plot the time series
plt.figure(figsize=(14, 7))
plt.plot(df['CORN'], label='Corn Prices')
plt.plot(df['Raw Sugar'], label='Raw Sugar Prices')
plt.title('Corn vs. Raw Sugar Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()
# Correlation analysis
correlation = df['CORN'].corr(df['Raw Sugar'])
print(f"Correlation between Corn and Raw Sugar prices: {correlation}")
#Test Stationarity
from statsmodels.tsa.stattools import adfuller
# Define a function to perform the ADF test
def adf_test(series):
result = adfuller(series)
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')
print("Corn Price Stationarity Test")
adf_test(df['CORN'])
print("\nRaw Sugar Price Stationarity Test")
adf_test(df['Raw Sugar'])
# Differencing if needed
df['Corn_diff'] = df['CORN'].diff().dropna()
df['Raw_Sugar_diff'] = df['Raw Sugar'].diff().dropna()
print("Corn Price Stationarity Test")
adf_test(df['Corn_diff'].dropna())
print("\nRaw Sugar Price Stationarity Test")
adf_test(df['Raw_Sugar_diff'].dropna())
#Caluculate ACF
from pmdarima import auto_arima
# Auto-ARIMA to find best parameters for Corn
corn_auto_model = auto_arima(df['Corn_diff'].dropna(), seasonal=False, trace=True)
# Auto-ARIMA to find best parameters for Raw Sugar
sugar_auto_model = auto_arima(df['Raw_Sugar_diff'].dropna(), seasonal=False, trace=True)
#Forecast
from statsmodels.tsa.arima.model import ARIMA
# Fit ARIMA model on differenced data if required
corn_model = ARIMA(df['Corn_diff'].dropna(), order=(0,0,0)) # adjust order based on ACF and PACF
corn_fit = corn_model.fit()
print(corn_fit.summary())
# Forecast Corn Prices
corn_forecast = corn_fit.forecast(steps=12)
print("Corn Price Forecast:", corn_forecast)
# Similar approach for Raw Sugar
sugar_model = ARIMA(df['Raw_Sugar_diff'].dropna(), order=(0,0,0))
sugar_fit = sugar_model.fit()
print(sugar_fit.summary())
# Forecast Raw Sugar Prices
sugar_forecast = sugar_fit.forecast(steps=100)
print("Raw Sugar Price Forecast:", sugar_forecast)
# Assuming df.index is of type string, convert it to datetime
df.index = pd.to_datetime(df.index, format="%d-%m-%Y")
# Create a date range for the forecasted values
forecast_dates = pd.date_range(df.index[-1] + pd.Timedelta(days=1), periods=100, freq='D')
# Plotting
plt.figure(figsize=(14, 7))
# Plot original data
#plt.plot(df['Corn_diff'].iloc[-500:], label='Corn Prices', color='blue')
plt.plot(df['Raw_Sugar_diff'].iloc[-500:], label='Raw Sugar Prices', color='green')
# Plot forecasted data
#plt.plot(forecast_dates, corn_forecast, label='Corn Forecast', color='blue', linestyle='--')
plt.plot(forecast_dates, sugar_forecast, label='Raw Sugar Forecast', color='green', linestyle='--')
plt.title('Corn and Raw Sugar Prices with Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()
# VAR Modelling
from statsmodels.tsa.api import VAR
# Fit a VAR model
model = VAR(df[['CORN', 'Raw Sugar']])
model_fitted = model.fit(maxlags=None, ic='aic')
# Display the model summary
print(model_fitted.summary())
print(len(model_fitted.coefs))
#if len(model_fitted.coefs) > 0:
# Forecast the next 10 periods
forecast_steps = 100
forecast = model_fitted.forecast(df[['CORN', 'Raw Sugar']].values[-model_fitted.k_ar:],
steps=forecast_steps)
# Convert forecast to a DataFrame
forecast_index = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps,
freq='D')
forecast_df = pd.DataFrame(forecast, index=forecast_index, columns=['CORN', 'Raw Sugar'])
print(forecast_df)
# Plot the forecasted values
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Raw Sugar'], label='Historical Sugar Price', color='blue')
plt.plot(forecast_df.index, forecast_df['Raw Sugar'], label='Forecasted Sugar Price', color='red')
plt.title('Sugar Price Forecast Using VAR Model')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()
#train model
# Split data into training and testing sets (80/20 split)
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]
# Fit the model on the training data
model_fitted_train = model.fit(maxlags=None, ic='aic')
# Predict on the test data
test_forecast = model_fitted_train.forecast(train[['CORN', 'Raw Sugar', 'USDBRL']].values[-
model_fitted_train.k_ar:], steps=len(test))
# Compare actual vs predicted
test_forecast_df = pd.DataFrame(test_forecast, index=test.index, columns=['CORN', 'Raw Sugar',
'USDBRL'])
plt.figure(figsize=(12, 6))
plt.plot(test.index, test['Raw Sugar'], label='Actual Sugar Price', color='blue')
plt.plot(test_forecast_df.index, test_forecast_df['Raw Sugar'], label='Predicted Sugar Price',
color='red')
plt.title('Actual vs Predicted Sugar Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()
#test model
from sklearn.metrics import mean_squared_error
# Calculate RMSE for the model's performance on test data
rmse = mean_squared_error(test['Raw Sugar'], test_forecast_df['Raw Sugar'], squared=False)
print(f'Root Mean Squared Error (RMSE): {rmse}')
import numpy as np
# Define the function to calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
# Convert to numpy arrays (in case they are lists or pandas series)
y_true = np.array(y_true)
y_pred = np.array(y_pred)
# Avoid division by zero by replacing zeros with a small value
epsilon = 1e-10
y_true = np.where(y_true == 0, epsilon, y_true)
# Calculate MAPE
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
return mape
mape = mean_absolute_percentage_error(test['Raw Sugar'], test_forecast_df['Raw Sugar'])
print(mape)