query = """
SELECT *
FROM `stockprediction-434721.stock_data.amzn_prices`
WHERE CAST(Date AS DATE) >= DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR)
"""
amzn_df = client.query(query).to_dataframe()
amzn_df.head(10)
Date | Open | High | Low | Close | Adj Close | Volume | Ticker | |
---|---|---|---|---|---|---|---|---|
0 | 2019-12-03 00:00:00+00:00 | 88.00 | 88.643501 | 87.361504 | 88.498001 | 88.498001 | 67618000 | AMZN |
1 | 2022-09-23 00:00:00+00:00 | 116.00 | 116.050003 | 112.059998 | 113.779999 | 113.779999 | 65126700 | AMZN |
2 | 2022-10-26 00:00:00+00:00 | 116.00 | 119.349998 | 114.760002 | 115.660004 | 115.660004 | 68802300 | AMZN |
3 | 2024-04-04 00:00:00+00:00 | 184.00 | 185.100006 | 180.000000 | 180.000000 | 180.000000 | 41624300 | AMZN |
4 | 2024-07-10 00:00:00+00:00 | 200.00 | 200.110001 | 197.690002 | 199.789993 | 199.789993 | 32883800 | AMZN |
5 | 2023-07-06 00:00:00+00:00 | 128.25 | 128.729996 | 127.370003 | 128.360001 | 128.360001 | 40639900 | AMZN |
6 | 2020-12-23 00:00:00+00:00 | 160.25 | 160.506500 | 159.208496 | 159.263504 | 159.263504 | 41876000 | AMZN |
7 | 2019-12-13 00:00:00+00:00 | 88.25 | 88.449501 | 87.750000 | 88.046997 | 88.046997 | 54914000 | AMZN |
8 | 2020-10-09 00:00:00+00:00 | 160.50 | 164.449493 | 159.891495 | 164.332504 | 164.332504 | 98158000 | AMZN |
9 | 2021-11-04 00:00:00+00:00 | 168.50 | 174.931503 | 168.250000 | 173.850006 | 173.850006 | 107060000 | AMZN |
CLEANING AND PREPROCESSING:
# Check for any missing or null values
print(amzn_df.isnull().sum())
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 Ticker 0 dtype: int64
import pandas as pd
# Ensure that Date is in datetime format
amzn_df['Date'] = pd.to_datetime(amzn_df['Date'])
# Drop columns that are not necessary for modeling
# Adjust this based on your needs
amzn_df = amzn_df.drop(columns=['Adj Close'])
# Sort data by Date in ascending order
amzn_df = amzn_df.sort_values(by='Date', ascending=True)
# Preview updated dataframes
print(amzn_df.head())
Date Open High Low Close \ 337 2019-09-16 00:00:00+00:00 91.200996 91.284500 90.010002 90.391998 1226 2019-09-17 00:00:00+00:00 90.353996 91.199501 90.205002 91.127502 288 2019-09-18 00:00:00+00:00 90.851997 91.102997 89.775002 90.873001 833 2019-09-19 00:00:00+00:00 91.051003 91.628502 90.894997 91.074997 1168 2019-09-20 00:00:00+00:00 91.085503 91.531502 89.045998 89.708000 Volume Ticker 337 73510000 AMZN 1226 39648000 AMZN 288 50112000 AMZN 833 40630000 AMZN 1168 106838000 AMZN
FEATURE ENGINEERING:
Moving Averages (e.g., 7-day, 30-day): These smooth out stock prices and reveal trends. Volatility (Standard deviation of returns): Indicates stock price variability. 2. Create Lag Features: Previous day's prices: Prices from a few days ago can help the model see short-term trends. 3. Compute Returns: Daily returns: Percentage change from one day to the next.
# Feature Engineering for Amazon
# 1. Moving Averages
amzn_df['7_day_MA'] = amzn_df['Close'].rolling(window=7).mean()
amzn_df['30_day_MA'] = amzn_df['Close'].rolling(window=30).mean()
# 2. Volatility (Standard deviation of daily returns over 7 and 30 days)
amzn_df['7_day_volatility'] = amzn_df['Close'].pct_change().rolling(window=7).std()
amzn_df['30_day_volatility'] = amzn_df['Close'].pct_change().rolling(window=30).std()
# 3. Lag Features (Previous day's price and volume)
amzn_df['Previous_Close'] = amzn_df['Close'].shift(1)
amzn_df['Previous_Volume'] = amzn_df['Volume'].shift(1)
# 4. Daily Returns
amzn_df['Daily_Return'] = amzn_df['Close'].pct_change()
# Preview updated dataframe for Amazon
print(amzn_df.head())
Date Open High Low Close \ 337 2019-09-16 00:00:00+00:00 91.200996 91.284500 90.010002 90.391998 1226 2019-09-17 00:00:00+00:00 90.353996 91.199501 90.205002 91.127502 288 2019-09-18 00:00:00+00:00 90.851997 91.102997 89.775002 90.873001 833 2019-09-19 00:00:00+00:00 91.051003 91.628502 90.894997 91.074997 1168 2019-09-20 00:00:00+00:00 91.085503 91.531502 89.045998 89.708000 Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 337 73510000 AMZN NaN NaN NaN 1226 39648000 AMZN NaN NaN NaN 288 50112000 AMZN NaN NaN NaN 833 40630000 AMZN NaN NaN NaN 1168 106838000 AMZN NaN NaN NaN 30_day_volatility Previous_Close Previous_Volume Daily_Return 337 NaN NaN <NA> NaN 1226 NaN 90.391998 73510000 0.008137 288 NaN 91.127502 39648000 -0.002793 833 NaN 90.873001 50112000 0.002223 1168 NaN 91.074997 40630000 -0.015010
# Check for missing values in each column for Amazon
print(amzn_df.isna().sum())
# Visualize where NaNs occur in Amazon data
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.heatmap(amzn_df.isna(), cbar=False, cmap="viridis")
plt.title('Amazon Data Missing Values')
plt.show()
Date 0 Open 0 High 0 Low 0 Close 0 Volume 0 Ticker 0 7_day_MA 6 30_day_MA 29 7_day_volatility 7 30_day_volatility 30 Previous_Close 1 Previous_Volume 1 Daily_Return 1 dtype: int64
Based on the heatmaps and summary, it looks like the missing values are indeed concentrated at the beginning of each dataset, particularly in the moving averages and volatility columns. Since these are rolling calculations, it's normal to see NaN values at the start.
It would be appropriate to drop the rows that contain these NaN values since they appear at the beginning and are not informative for future predictions.
# Drop rows with NaN values in the Amazon dataframe
amzn_df_cleaned = amzn_df.dropna()
# Preview the cleaned Amazon dataframe
print(amzn_df_cleaned.head())
Date Open High Low Close \ 1039 2019-10-28 00:00:00+00:00 87.403000 88.934998 87.125000 88.853996 524 2019-10-29 00:00:00+00:00 88.740501 88.849998 87.790497 88.135498 947 2019-10-30 00:00:00+00:00 88.012001 89.119003 87.956001 88.999496 143 2019-10-31 00:00:00+00:00 88.799500 89.599998 88.573997 88.833000 1024 2019-11-01 00:00:00+00:00 89.400497 89.872498 89.260498 89.571999 Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 1039 74178000 AMZN 88.501856 87.893966 0.012734 524 45538000 AMZN 88.538998 87.794233 0.011253 947 48988000 AMZN 88.498498 87.731783 0.009998 143 55624000 AMZN 88.576570 87.657050 0.008888 1024 55808000 AMZN 88.785641 87.652516 0.009174 30_day_volatility Previous_Close Previous_Volume Daily_Return 1039 0.011103 88.066498 192528000 0.008942 524 0.011062 88.853996 74178000 -0.008086 947 0.011232 88.135498 45538000 0.009803 143 0.011221 88.999496 48988000 -0.001871 1024 0.011006 88.833000 55624000 0.008319
print(amzn_df_cleaned.shape)
(1221, 14)
Exporting to CSV to update BigQuery table:
# Export the cleaned and feature-engineered dataframe to a CSV file
amzn_df_cleaned.to_csv('amzn_cleaned_feature_engineered.csv', index=False)
print("Dataframe exported to CSV.")
Dataframe exported to CSV.
from google.colab import files
# Download the Amazon CSV file to your local machine
files.download('amzn_cleaned_feature_engineered.csv')
MODEL TRAINING:
Step 1: Split the Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
# Define features and target variable
X_amzn = amzn_df_cleaned[['7_day_MA', '30_day_MA', '7_day_volatility', '30_day_volatility', 'Previous_Close', 'Previous_Volume', 'Daily_Return']]
y_amzn = amzn_df_cleaned['Close']
# Split the data
X_train_amzn, X_test_amzn, y_train_amzn, y_test_amzn = train_test_split(X_amzn, y_amzn, test_size=0.2, random_state=42)
# Preview the shapes
print(X_train_amzn.shape, X_test_amzn.shape, y_train_amzn.shape, y_test_amzn.shape)
(976, 7) (245, 7) (976,) (245,)
Step 2: Select Machine Learning Models
LINEAR REGRESSION MODEL:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model
model_amzn = LinearRegression()
# Train the model on the training data
model_amzn.fit(X_train_amzn, y_train_amzn)
# Predict on the test data
y_pred_amzn = model_amzn.predict(X_test_amzn)
# Evaluate the model
mse_amzn = mean_squared_error(y_test_amzn, y_pred_amzn)
r2_amzn = r2_score(y_test_amzn, y_pred_amzn)
print("Amazon Linear Regression Performance:")
print(f"Mean Squared Error: {mse_amzn}")
print(f"R-squared: {r2_amzn}")
Amazon Linear Regression Performance: Mean Squared Error: 0.4787777728733036 R-squared: 0.9994514430344438
import matplotlib.pyplot as plt
import numpy as np
# Define the cyberpunk theme colors
cyberpunk_blue = '#00FFFF'
cyberpunk_red = '#FF007F'
cyberpunk_background = '#0D0D0D'
# Customize the plot style
plt.style.use('dark_background')
# Plot for Amazon stock
plt.figure(figsize=(10, 6))
plt.plot(np.arange(len(y_test_amzn)), y_test_amzn, color=cyberpunk_blue, label='Actual Price', linewidth=2)
plt.plot(np.arange(len(y_pred_amzn)), y_pred_amzn, color=cyberpunk_red, linestyle='--', label='Predicted Price', linewidth=2)
plt.title('Amazon Stock Price - Actual vs Predicted', fontsize=16, color=cyberpunk_blue)
plt.xlabel('Date', fontsize=12, color='white')
plt.ylabel('Price', fontsize=12, color='white')
plt.legend(loc='upper left', fontsize=10)
plt.grid(True, color='#333333')
plt.gca().set_facecolor(cyberpunk_background)
plt.show()
RANDOM FOREST MODEL:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model for Amazon
rf_amzn = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
rf_amzn.fit(X_train_amzn, y_train_amzn)
# Predict on the test data
y_pred_rf_amzn = rf_amzn.predict(X_test_amzn)
# Evaluate the model
mse_rf_amzn = mean_squared_error(y_test_amzn, y_pred_rf_amzn)
r2_rf_amzn = r2_score(y_test_amzn, y_pred_rf_amzn)
print("Amazon Random Forest Performance:")
print(f"Mean Squared Error: {mse_rf_amzn}")
print(f"R-squared: {r2_rf_amzn}")
Amazon Random Forest Performance: Mean Squared Error: 3.0215685259436027 R-squared: 0.9965380546973505
# Visualization for Random Forest - Amazon
plt.figure(figsize=(10, 6))
plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_rf_amzn[:250], 'm--', label="Predicted Price")
plt.title("Amazon Stock Price - Actual vs Predicted (Random Forest)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
FEATURE IMPORTANCE ANALYSIS:
explore which features (7-day MA, 30-day volatility, etc.) had the most influence on the stock price predictions.
# Get feature importance from the Random Forest model
importances_amzn = rf_amzn.feature_importances_
# Create a dataframe for the features and their importance
feature_names_amzn = X_train_amzn.columns
importance_df_amzn = pd.DataFrame({
'Feature': feature_names_amzn,
'Importance': importances_amzn
})
# Sort the dataframe by importance
importance_df_amzn = importance_df_amzn.sort_values(by='Importance', ascending=False)
# Plot the feature importance
plt.figure(figsize=(10, 6))
plt.barh(importance_df_amzn['Feature'], importance_df_amzn['Importance'], color='cyan')
plt.xlabel('Feature Importance', color='cyan')
plt.ylabel('Features', color='cyan')
plt.title('Amazon Stock Feature Importance (Random Forest)', color='cyan')
plt.gca().set_facecolor('black')
plt.gca().spines['bottom'].set_color('cyan')
plt.gca().spines['top'].set_color('cyan')
plt.gca().spines['left'].set_color('cyan')
plt.gca().spines['right'].set_color('cyan')
plt.show()
GRADIENT BOOSTING REGRESSOR:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the Gradient Boosting model for Amazon
gb_amzn = GradientBoostingRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
gb_amzn.fit(X_train_amzn, y_train_amzn)
# Predict on the test data
y_pred_gb_amzn = gb_amzn.predict(X_test_amzn)
# Evaluate the model
mse_gb_amzn = mean_squared_error(y_test_amzn, y_pred_gb_amzn)
r2_gb_amzn = r2_score(y_test_amzn, y_pred_gb_amzn)
print("Amazon Gradient Boosting Performance:")
print(f"Mean Squared Error: {mse_gb_amzn}")
print(f"R-squared: {r2_gb_amzn}")
Amazon Gradient Boosting Performance: Mean Squared Error: 1.3739593743462717 R-squared: 0.99842579370244
# Visualization for Gradient Boosting - Amazon
plt.figure(figsize=(10, 6))
plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_gb_amzn[:250], 'm--', label="Predicted Price")
plt.title("Amazon Stock Price - Actual vs Predicted (Gradient Boosting)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
HYPERPARAMTER TUNING:
from sklearn.model_selection import GridSearchCV
# Define the parameter grid for Gradient Boosting
param_grid = {
'n_estimators': [100, 200, 300],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
# Initialize the model
gb_amzn = GradientBoostingRegressor(random_state=42)
# Initialize GridSearchCV
grid_search_amzn = GridSearchCV(estimator=gb_amzn, param_grid=param_grid,
cv=5, n_jobs=-1, verbose=2)
# Fit the model to the training data
grid_search_amzn.fit(X_train_amzn, y_train_amzn)
# Get the best parameters
best_params_amzn = grid_search_amzn.best_params_
print("Best parameters for Amazon:", best_params_amzn)
# Evaluate the model with the best parameters
best_gb_amzn = grid_search_amzn.best_estimator_
y_pred_amzn = best_gb_amzn.predict(X_test_amzn)
mse_amzn = mean_squared_error(y_test_amzn, y_pred_amzn)
r2_amzn = r2_score(y_test_amzn, y_pred_amzn)
print(f"Amazon Gradient Boosting Performance (Tuned):")
print(f"Mean Squared Error: {mse_amzn}")
print(f"R-squared: {r2_amzn}")
Fitting 5 folds for each of 243 candidates, totalling 1215 fits Best parameters for Amazon: {'learning_rate': 0.2, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300} Amazon Gradient Boosting Performance (Tuned): Mean Squared Error: 1.1142366645223887 R-squared: 0.9987233695500656
the hyperparameter tuning for Amazon has finished successfully, and the best parameters have been found. The performance has improved with a lower mean squared error (MSE) after tuning.
SAVING THE TUNED MODEL:
import joblib
joblib.dump(best_gb_amzn, 'best_gb_amzn_model.pkl')
['best_gb_amzn_model.pkl']
(FOR FUTURE REFERENCE): LOAD MODEL
#. import joblib
# Load the saved Amazon Gradient Boosting model
#. best_gb_amzn_loaded = joblib.load('best_gb_amzn_model.pkl')
# Predict on new data (assuming `new_data_amzn` is available)
# new_data_amzn should be in the same format as your training data (features)
#. future_predictions_amzn = best_gb_amzn_loaded.predict(new_data_amzn)
# Print the predictions
#. print(future_predictions_amzn)
Long Short-Term Memory (LSTM) Neural Network for stock price prediction:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
# Feature scaling
scaler = StandardScaler()
X_train_amzn_scaled = scaler.fit_transform(X_train_amzn)
X_test_amzn_scaled = scaler.transform(X_test_amzn)
# Define the neural network model
model_amzn = Sequential([
Dense(64, input_dim=X_train_amzn.shape[1], activation='relu'),
Dense(32, activation='relu'),
Dense(1) # Output layer
])
# Compile the model
model_amzn.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
history_amzn = model_amzn.fit(X_train_amzn_scaled, y_train_amzn, validation_split=0.2, epochs=50, batch_size=32)
# Predict on the test set
y_pred_nn_amzn = model_amzn.predict(X_test_amzn_scaled)
# Evaluate the performance
mse_amzn_nn = mean_squared_error(y_test_amzn, y_pred_nn_amzn)
r2_amzn_nn = r2_score(y_test_amzn, y_pred_nn_amzn)
print(f"Amazon Neural Network Performance:")
print(f"Mean Squared Error: {mse_amzn_nn}")
print(f"R-squared: {r2_amzn_nn}")
Epoch 1/50
/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
25/25 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - loss: 20321.0449 - val_loss: 20512.3633 Epoch 2/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 20410.3848 - val_loss: 20019.0195 Epoch 3/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 19922.3047 - val_loss: 19088.2773 Epoch 4/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 18526.8301 - val_loss: 17395.9355 Epoch 5/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 16592.4102 - val_loss: 14739.0391 Epoch 6/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 14020.0117 - val_loss: 11208.5176 Epoch 7/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 10375.9297 - val_loss: 7504.0557 Epoch 8/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 6743.5605 - val_loss: 4611.0400 Epoch 9/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 4069.7537 - val_loss: 3084.7883 Epoch 10/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 2835.8362 - val_loss: 2478.8738 Epoch 11/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 2332.5850 - val_loss: 2189.9138 Epoch 12/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 1961.0203 - val_loss: 1928.7080 Epoch 13/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 1924.0958 - val_loss: 1697.0193 Epoch 14/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 1540.3274 - val_loss: 1500.9971 Epoch 15/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 1327.3744 - val_loss: 1321.0288 Epoch 16/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 1265.3893 - val_loss: 1160.9612 Epoch 17/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 1073.2626 - val_loss: 1033.3374 Epoch 18/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 989.2816 - val_loss: 919.8984 Epoch 19/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 859.3665 - val_loss: 819.3785 Epoch 20/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 794.5203 - val_loss: 735.0214 Epoch 21/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 714.3181 - val_loss: 662.0992 Epoch 22/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 623.0482 - val_loss: 601.8813 Epoch 23/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 552.9767 - val_loss: 549.3373 Epoch 24/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 520.8948 - val_loss: 500.9524 Epoch 25/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 463.5781 - val_loss: 460.2129 Epoch 26/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 426.6887 - val_loss: 426.3709 Epoch 27/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 399.2155 - val_loss: 395.5669 Epoch 28/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 356.4114 - val_loss: 368.4954 Epoch 29/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 328.3011 - val_loss: 341.4999 Epoch 30/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 299.3009 - val_loss: 318.3630 Epoch 31/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 305.2044 - val_loss: 296.2700 Epoch 32/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 265.2326 - val_loss: 276.0905 Epoch 33/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 233.8230 - val_loss: 258.8863 Epoch 34/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 238.2446 - val_loss: 242.0714 Epoch 35/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 198.2336 - val_loss: 224.9121 Epoch 36/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 208.5666 - val_loss: 208.9374 Epoch 37/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 159.8971 - val_loss: 196.9652 Epoch 38/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 175.8828 - val_loss: 182.3966 Epoch 39/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 166.1316 - val_loss: 169.8279 Epoch 40/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 143.6610 - val_loss: 158.9349 Epoch 41/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 124.1439 - val_loss: 149.5569 Epoch 42/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 117.7243 - val_loss: 140.1334 Epoch 43/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 120.8887 - val_loss: 130.5774 Epoch 44/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 104.0014 - val_loss: 122.7458 Epoch 45/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 99.3025 - val_loss: 113.1980 Epoch 46/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 84.5990 - val_loss: 105.6353 Epoch 47/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 76.7702 - val_loss: 99.8833 Epoch 48/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 70.6033 - val_loss: 92.3938 Epoch 49/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 72.4059 - val_loss: 84.2684 Epoch 50/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 59.7413 - val_loss: 78.1540 8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step Amazon Neural Network Performance: Mean Squared Error: 86.66687340067999 R-squared: 0.9007019127024065
Notes: You can adjust the number of neurons, layers, epochs, and batch size to optimize the model. The StandardScaler ensures that all features are on the same scale, which is important for neural networks. The models are trained for 50 epochs, but you can adjust the number of epochs based on the performance.
# Amazon Neural Network Predictions Visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_amzn[:250], 'm--', label="Predicted Price")
plt.title("Amazon Stock Price - Actual vs Predicted (Neural Network)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle="--", alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
# Save the Neural Network model for Amazon in the native Keras format
model_amzn.save('best_nn_amzn_model_tuned.keras')
joblib.dump(model_amzn, 'linear_reg_amzn_model.pkl')
['linear_reg_amzn_model.pkl']
joblib.dump(rf_amzn, 'random_forest_amzn_model.pkl')
['random_forest_amzn_model.pkl']
joblib.dump(best_gb_amzn, 'gradient_boost_amzn_model.pkl')
['gradient_boost_amzn_model.pkl']
# Load all Models for Amazon:
from tensorflow.keras.models import load_model
# Load Linear Regression model
linear_reg_amzn_model = joblib.load('linear_reg_amzn_model.pkl')
# Load Random Forest model
random_forest_amzn_model = joblib.load('random_forest_amzn_model.pkl')
# Load Gradient Boosting model
gradient_boost_amzn_model = joblib.load('gradient_boost_amzn_model.pkl')
# Load Neural Network model for Amazon
best_nn_amzn_model = load_model('best_nn_amzn_model_tuned.keras')
from google.colab import files
# Downloading Amazon models
files.download('linear_reg_amzn_model.pkl')
files.download('random_forest_amzn_model.pkl')
files.download('gradient_boost_amzn_model.pkl')
files.download('best_nn_amzn_model_tuned.keras')
# Download other models as needed
Dashboard:
import matplotlib.pyplot as plt
import numpy as np
# Define colors
cyberpunk_blue = '#00FFFF'
cyberpunk_pink = '#FF1493' # This is the pink color for Gradient Boosting
cyberpunk_background = '#000D0D'
random_forest_color = '#FF00FF' # Magenta for Random Forest
lstm_color = '#FFFF00' # Yellow for LSTM
# Create subplots: 2 rows, 2 columns
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
fig.subplots_adjust(hspace=0.6, top=0.70) # Adjusting space between the charts and shifting top margin for title
# Title for the entire figure
fig.suptitle('Amazon Stock Price Prediction - Model Comparison', fontsize=18, color='white')
# Table with model performance metrics, replacing MSE for Neural Network with '--'
table_data = [
["Model", "R-squared", "Mean Squared Error"],
["Linear Regression", 0.9995, 0.4788],
["Random Forest", 0.9965, 3.0216],
["Gradient Boosting", 0.9987, 1.1142],
["LSTM Neural Network", 0.9007, "--"]
]
# Add the table without extra space
ax_table = fig.add_axes([0.1, 0.78, 0.8, 0.12]) # Shifting the table slightly lower
ax_table.axis('off')
table = ax_table.table(cellText=table_data, colWidths=[0.3]*3, loc='center', cellLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Set table background to black and text to white
for key, cell in table.get_celld().items():
cell.set_edgecolor('white')
cell.set_text_props(color='white')
cell.set_facecolor('black')
# Plot 1: Linear Regression
axs[0, 0].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 0].plot(np.arange(len(y_pred_amzn[:250])), y_pred_amzn[:250], 'm--', label='Predicted Price (LR)', alpha=0.7)
axs[0, 0].set_title('Linear Regression', fontsize=12, color='white')
axs[0, 0].set_xlabel('Date', fontsize=10, color='white')
axs[0, 0].set_ylabel('Price', fontsize=10, color='white')
axs[0, 0].legend(loc='upper left')
axs[0, 0].grid(True, linestyle='--', alpha=0.7)
axs[0, 0].set_facecolor(cyberpunk_background)
# Plot 2: Random Forest (Magenta)
axs[0, 1].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 1].plot(np.arange(len(y_pred_rf_amzn[:250])), y_pred_rf_amzn[:250], color=random_forest_color, label='Predicted Price (RF)', alpha=0.7)
axs[0, 1].set_title('Random Forest', fontsize=12, color='white')
axs[0, 1].set_xlabel('Date', fontsize=10, color='white')
axs[0, 1].set_ylabel('Price', fontsize=10, color='white')
axs[0, 1].legend(loc='upper left')
axs[0, 1].grid(True, linestyle='--', alpha=0.7)
axs[0, 1].set_facecolor(cyberpunk_background)
# Plot 3: Gradient Boosting (Pink)
axs[1, 0].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 0].plot(np.arange(len(y_pred_gb_amzn[:250])), y_pred_gb_amzn[:250], color=cyberpunk_pink, label='Predicted Price (GB)', alpha=0.7) # Pink color
axs[1, 0].set_title('Gradient Boosting', fontsize=12, color='white')
axs[1, 0].set_xlabel('Date', fontsize=10, color='white')
axs[1, 0].set_ylabel('Price', fontsize=10, color='white')
axs[1, 0].legend(loc='upper left')
axs[1, 0].grid(True, linestyle='--', alpha=0.7)
axs[1, 0].set_facecolor(cyberpunk_background)
# Plot 4: LSTM Neural Network (Yellow)
axs[1, 1].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 1].plot(np.arange(len(y_pred_nn_amzn[:250])), y_pred_nn_amzn[:250], color=lstm_color, label='Predicted Price (NN)', alpha=0.7)
axs[1, 1].set_title('LSTM Neural Network', fontsize=12, color='white')
axs[1, 1].set_xlabel('Date', fontsize=10, color='white')
axs[1, 1].set_ylabel('Price', fontsize=10, color='white')
axs[1, 1].legend(loc='upper left')
axs[1, 1].grid(True, linestyle='--', alpha=0.7)
axs[1, 1].set_facecolor(cyberpunk_background)
# Display the final dashboard
plt.show()