query = """
SELECT *
FROM `stockprediction-434721.stock_data.google_prices`
WHERE CAST(Date AS DATE) >= DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR)
"""
google_df = client.query(query).to_dataframe()
googl_df.head(10)
Date | Open | High | Low | Close | Adj Close | Volume | Ticker | |
---|---|---|---|---|---|---|---|---|
0 | 2023-03-02 00:00:00+00:00 | 89.660004 | 92.279999 | 89.589996 | 92.0000 | 91.894531 | 32204400 | GOOGL |
1 | 2020-11-05 00:00:00+00:00 | 88.522499 | 89.591003 | 87.051498 | 88.1250 | 88.023979 | 39758000 | GOOGL |
2 | 2023-11-20 00:00:00+00:00 | 133.690002 | 136.660004 | 133.619995 | 136.2500 | 136.093811 | 27815500 | GOOGL |
3 | 2023-10-23 00:00:00+00:00 | 135.039993 | 137.660004 | 133.949997 | 136.5000 | 136.343521 | 26317900 | GOOGL |
4 | 2024-04-05 00:00:00+00:00 | 150.029999 | 153.419998 | 149.600006 | 152.5000 | 152.325180 | 23449300 | GOOGL |
5 | 2021-03-08 00:00:00+00:00 | 104.203003 | 105.686996 | 100.259499 | 100.3750 | 100.259933 | 36868000 | GOOGL |
6 | 2021-02-11 00:00:00+00:00 | 104.550003 | 104.699997 | 103.443001 | 104.4375 | 104.317772 | 20480000 | GOOGL |
7 | 2023-04-18 00:00:00+00:00 | 106.489998 | 106.540001 | 104.070000 | 104.5000 | 104.380203 | 26596400 | GOOGL |
8 | 2023-10-19 00:00:00+00:00 | 138.500000 | 139.660004 | 137.380005 | 137.7500 | 137.592087 | 26066000 | GOOGL |
9 | 2022-09-14 00:00:00+00:00 | 104.570000 | 105.279999 | 103.660004 | 105.0000 | 104.879631 | 26395100 | GOOGL |
CLEANING AND PREPROCESSING:
# Check for any missing or null values
print(google_df.isnull().sum())
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 Ticker 0 dtype: int64
import pandas as pd
# Ensure that Date is in datetime format
google_df['Date'] = pd.to_datetime(google_df['Date'])
# Drop columns that are not necessary for modeling
# Adjust this based on your needs
google_df = google_df.drop(columns=['Adj Close'])
# Sort data by Date in ascending order
google_df = google_df.sort_values(by='Date', ascending=True)
# Preview updated dataframes
print(google_df.head())
Date Open High Low Close \ 847 2019-09-16 00:00:00+00:00 61.521999 61.951000 61.259499 61.581501 503 2019-09-17 00:00:00+00:00 61.581501 61.750000 61.167500 61.493999 1101 2019-09-18 00:00:00+00:00 61.512001 61.812000 60.823502 61.632500 30 2019-09-19 00:00:00+00:00 61.616001 62.216999 61.615501 61.937500 1247 2019-09-20 00:00:00+00:00 61.681999 62.139999 61.179001 61.492001 Volume Ticker 847 28218000 GOOGL 503 20806000 GOOGL 1101 17440000 GOOGL 30 16554000 GOOGL 1247 37692000 GOOGL
FEATURE ENGINEERING:
Moving Averages (e.g., 7-day, 30-day): These smooth out stock prices and reveal trends. Volatility (Standard deviation of returns): Indicates stock price variability. 2. Create Lag Features: Previous day's prices: Prices from a few days ago can help the model see short-term trends. 3. Compute Returns: Daily returns: Percentage change from one day to the next.
# Feature Engineering for google
# 1. Moving Averages
google_df['7_day_MA'] = google_df['Close'].rolling(window=7).mean()
google_df['30_day_MA'] = google_df['Close'].rolling(window=30).mean()
# 2. Volatility (Standard deviation of daily returns over 7 and 30 days)
google_df['7_day_volatility'] = google_df['Close'].pct_change().rolling(window=7).std()
google_df['30_day_volatility'] = google_df['Close'].pct_change().rolling(window=30).std()
# 3. Lag Features (Previous day's price and volume)
google_df['Previous_Close'] = google_df['Close'].shift(1)
google_df['Previous_Volume'] = google_df['Volume'].shift(1)
# 4. Daily Returns
google_df['Daily_Return'] = google_df['Close'].pct_change()
# Preview updated dataframe for google
print(google_df.head())
Date Open High Low Close \ 847 2019-09-16 00:00:00+00:00 61.521999 61.951000 61.259499 61.581501 503 2019-09-17 00:00:00+00:00 61.581501 61.750000 61.167500 61.493999 1101 2019-09-18 00:00:00+00:00 61.512001 61.812000 60.823502 61.632500 30 2019-09-19 00:00:00+00:00 61.616001 62.216999 61.615501 61.937500 1247 2019-09-20 00:00:00+00:00 61.681999 62.139999 61.179001 61.492001 Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 847 28218000 GOOGL NaN NaN NaN 503 20806000 GOOGL NaN NaN NaN 1101 17440000 GOOGL NaN NaN NaN 30 16554000 GOOGL NaN NaN NaN 1247 37692000 GOOGL NaN NaN NaN 30_day_volatility Previous_Close Previous_Volume Daily_Return 847 NaN NaN <NA> NaN 503 NaN 61.581501 28218000 -0.001421 1101 NaN 61.493999 20806000 0.002252 30 NaN 61.632500 17440000 0.004949 1247 NaN 61.937500 16554000 -0.007193
# Check for missing values in each column for google
print(google_df.isna().sum())
# Visualize where NaNs occur in google data
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.heatmap(google_df.isna(), cbar=False, cmap="viridis")
plt.title('google Data Missing Values')
plt.show()
Date 0 Open 0 High 0 Low 0 Close 0 Volume 0 Ticker 0 7_day_MA 6 30_day_MA 29 7_day_volatility 7 30_day_volatility 30 Previous_Close 1 Previous_Volume 1 Daily_Return 1 dtype: int64
Based on the heatmaps and summary, it looks like the missing values are indeed concentrated at the beginning of each dataset, particularly in the moving averages and volatility columns. Since these are rolling calculations, it's normal to see NaN values at the start.
It would be appropriate to drop the rows that contain these NaN values since they appear at the beginning and are not informative for future predictions.
# Drop rows with NaN values in the google dataframe
google_df_cleaned = google_df.dropna()
# Preview the cleaned google dataframe
print(google_df_cleaned.head())
Date Open High Low Close \ 587 2019-10-28 00:00:00+00:00 63.750000 64.961998 63.615501 64.448997 494 2019-10-29 00:00:00+00:00 63.799999 64.040001 62.780499 63.033001 1187 2019-10-30 00:00:00+00:00 62.757500 63.401001 62.532501 63.035000 1153 2019-10-31 00:00:00+00:00 63.000000 63.326000 62.473999 62.939999 525 2019-11-01 00:00:00+00:00 63.290001 63.650002 62.985500 63.612499 Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 587 65428000 GOOGL 62.856500 61.475750 0.009195 494 52654000 GOOGL 62.972572 61.527050 0.013149 1187 29204000 GOOGL 63.089857 61.573800 0.013146 1153 31904000 GOOGL 63.215571 61.607217 0.013097 525 28812000 GOOGL 63.320000 61.677900 0.012766 30_day_volatility Previous_Close Previous_Volume Daily_Return 587 0.011020 63.215000 27104000 0.019521 494 0.011822 64.448997 65428000 -0.021971 1187 0.011820 63.033001 52654000 0.000032 1153 0.011801 63.035000 29204000 -0.001507 525 0.011845 62.939999 31904000 0.010685
print(google_df_cleaned.shape)
(1221, 14)
Exporting to CSV to update BigQuery table:
# Define the filename for the google dataframe
google_csv_filename = "google_cleaned_feature_engineered.csv"
# Export the cleaned google dataframe to CSV
google_df_cleaned.to_csv(google_csv_filename, index=False)
print(f"Dataframe exported to CSV: {google_csv_filename}")
Dataframe exported to CSV: googl_cleaned_feature_engineered.csv
from google.colab import files
# Download the google CSV file to your local machine
files.download('google_cleaned_feature_engineered.csv')
MODEL TRAINING:
Step 1: Split the Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
# Define features and target variable
X_google = google_df_cleaned[['7_day_MA', '30_day_MA', '7_day_volatility', '30_day_volatility', 'Previous_Close', 'Previous_Volume', 'Daily_Return']]
y_google = google_df_cleaned['Close']
# Split the data
X_train_google, X_test_google, y_train_google, y_test_google = train_test_split(X_google, y_google, test_size=0.2, random_state=42)
# Preview the shapes
print(X_train_google.shape, X_test_google.shape, y_train_google.shape, y_test_google.shape)
(976, 7) (245, 7) (976,) (245,)
Step 2: Select Machine Learning Models
LINEAR REGRESSION MODEL:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model
model_google = LinearRegression()
# Train the model on the training data
model_google.fit(X_train_google, y_train_google)
# Predict on the test data
y_pred_google = model_google.predict(X_test_google)
# Evaluate the model
mse_google = mean_squared_error(y_test_google, y_pred_google)
r2_google = r2_score(y_test_google, y_pred_google)
print("google Linear Regression Performance:")
print(f"Mean Squared Error: {mse_google}")
print(f"R-squared: {r2_google}")
Google Linear Regression Performance: Mean Squared Error: 0.3042198531078174 R-squared: 0.9996599043157406
import matplotlib.pyplot as plt
import numpy as np
# Define the cyberpunk theme colors
cyberpunk_blue = '#00FFFF'
cyberpunk_red = '#FF007F'
cyberpunk_background = '#0D0D0D'
# Customize the plot style
plt.style.use('dark_background')
# Plot for google stock
plt.figure(figsize=(10, 6))
plt.plot(np.arange(len(y_test_google)), y_test_google, color=cyberpunk_blue, label='Actual Price', linewidth=2)
plt.plot(np.arange(len(y_pred_google)), y_pred_google, color=cyberpunk_red, linestyle='--', label='Predicted Price', linewidth=2)
plt.title('google Stock Price - Actual vs Predicted', fontsize=16, color=cyberpunk_blue)
plt.xlabel('Date', fontsize=12, color='white')
plt.ylabel('Price', fontsize=12, color='white')
plt.legend(loc='upper left', fontsize=10)
plt.grid(True, color='#333333')
plt.gca().set_facecolor(cyberpunk_background)
plt.show()
RANDOM FOREST MODEL:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model
rf_google = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
rf_google.fit(X_train_google, y_train_google)
# Predict on the test data
y_pred_rf_google = rf_google.predict(X_test_google)
# Evaluate the model
mse_rf_google = mean_squared_error(y_test_google, y_pred_rf_google)
r2_rf_google = r2_score(y_test_google, y_pred_rf_google)
print("google Random Forest Performance:")
print(f"Mean Squared Error: {mse_rf_google}")
print(f"R-squared: {r2_rf_google}")
Google Random Forest Performance: Mean Squared Error: 0.8180990764843861 R-squared: 0.9990854246941263
# Visualization for Random Forest - google
plt.figure(figsize=(10, 6))
plt.plot(y_test_google[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_rf_google[:250], 'm--', label="Predicted Price")
plt.title("google Stock Price - Actual vs Predicted (Random Forest)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
FEATURE IMPORTANCE ANALYSIS:
explore which features (7-day MA, 30-day volatility, etc.) had the most influence on the stock price predictions.
# Get feature importance from the Random Forest model
importances_google = rf_google.feature_importances_
# Create a dataframe for the features and their importance
feature_names_google = X_train_google.columns
importance_df_google = pd.DataFrame({
'Feature': feature_names_google,
'Importance': importances_google
})
# Sort the dataframe by importance
importance_df_google = importance_df_google.sort_values(by='Importance', ascending=False)
# Plot the feature importance
plt.figure(figsize=(10, 6))
plt.barh(importance_df_google['Feature'], importance_df_google['Importance'], color='cyan')
plt.xlabel('Feature Importance', color='cyan')
plt.ylabel('Features', color='cyan')
plt.title('google Stock Feature Importance (Random Forest)', color='cyan')
plt.gca().set_facecolor('black')
plt.gca().spines['bottom'].set_color('cyan')
plt.gca().spines['top'].set_color('cyan')
plt.gca().spines['left'].set_color('cyan')
plt.gca().spines['right'].set_color('cyan')
plt.show()
GRADIENT BOOSTING REGRESSOR:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the Gradient Boosting model
gb_google = GradientBoostingRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
gb_google.fit(X_train_google, y_train_google)
# Predict on the test data
y_pred_gb_google = gb_google.predict(X_test_google)
# Evaluate the model
mse_gb_google = mean_squared_error(y_test_google, y_pred_gb_google)
r2_gb_google = r2_score(y_test_google, y_pred_gb_google)
print("google Gradient Boosting Performance:")
print(f"Mean Squared Error: {mse_gb_google}")
print(f"R-squared: {r2_gb_google}")
Google Gradient Boosting Performance: Mean Squared Error: 0.807097784485413 R-squared: 0.9990977233389777
# Visualization for Gradient Boosting - google
plt.figure(figsize=(10, 6))
plt.plot(y_test_google[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_gb_google[:250], 'm--', label="Predicted Price")
plt.title("google Stock Price - Actual vs Predicted (Gradient Boosting)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
HYPERPARAMETER TUNING:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Define the parameter grid for Gradient Boosting
param_grid = {
'n_estimators': [100, 200, 300], # Number of boosting stages to be run
'learning_rate': [0.01, 0.1, 0.2], # Step size shrinkage used in update
'max_depth': [3, 5, 7], # Maximum depth of the individual regression estimators
'min_samples_split': [2, 5, 10], # Minimum number of samples required to split an internal node
'min_samples_leaf': [1, 2, 4] # Minimum number of samples required to be at a leaf node
}
# Initialize the Gradient Boosting Regressor for google
gb_google = GradientBoostingRegressor(random_state=42)
# Initialize GridSearchCV to find the best parameters
grid_search_google = GridSearchCV(estimator=gb_google, param_grid=param_grid,
cv=5, n_jobs=-1, verbose=2)
# Fit the model to the google training data
grid_search_google.fit(X_train_google, y_train_google)
# Get the best parameters from GridSearch
best_params_google = grid_search_google.best_params_
print("Best parameters for google:", best_params_google)
# Evaluate the model with the best parameters
best_gb_google = grid_search_google.best_estimator_
y_pred_gb_google = best_gb_google.predict(X_test_google)
mse_google = mean_squared_error(y_test_google, y_pred_gb_google)
r2_google = r2_score(y_test_google, y_pred_gb_google)
print("google Gradient Boosting Performance (Tuned):")
print(f"Mean Squared Error: {mse_google}")
print(f"R-squared: {r2_google}")
Fitting 5 folds for each of 243 candidates, totalling 1215 fits Best parameters for Google: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 300} Google Gradient Boosting Performance (Tuned): Mean Squared Error: 0.6612336977634583 R-squared: 0.9992607887861398
SAVING THE TUNED MODEL:
import joblib
joblib.dump(best_gb_google, 'best_gb_google_model.pkl')
['best_gb_google_model.pkl']
(FOR FUTURE REFERENCE): LOAD MODEL
#import joblib
# best_gb_google_loaded = joblib.load('best_gb_google_model.pkl')
# future_predictions = best_gb_google_loaded.predict(new_data)
Long Short-Term Memory (LSTM) Neural Network for stock price prediction:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
# Feature scaling
scaler = StandardScaler()
X_train_google_scaled = scaler.fit_transform(X_train_google)
X_test_google_scaled = scaler.transform(X_test_google)
# Define the neural network model
model_google = Sequential([
Dense(64, input_dim=X_train_google.shape[1], activation='relu'),
Dense(32, activation='relu'),
Dense(1) # Output layer
])
# Compile the model
model_google.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
history_google = model_google.fit(X_train_google_scaled, y_train_google,
validation_split=0.2, epochs=50, batch_size=32)
# Predict on the test set
y_pred_nn_google = model_google.predict(X_test_google_scaled)
# Evaluate the performance
mse_google_nn = mean_squared_error(y_test_google, y_pred_nn_google)
r2_google_nn = r2_score(y_test_google, y_pred_nn_google)
print("google Neural Network Performance:")
print(f"Mean Squared Error: {mse_google_nn}")
print(f"R-squared: {r2_google_nn}")
Epoch 1/50
/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
25/25 ━━━━━━━━━━━━━━━━━━━━ 2s 9ms/step - loss: 13658.7598 - val_loss: 13657.1328 Epoch 2/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 13237.3096 - val_loss: 12971.4639 Epoch 3/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 12422.0498 - val_loss: 11615.1182 Epoch 4/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 10956.1113 - val_loss: 9381.0244 Epoch 5/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 8574.8867 - val_loss: 6527.8501 Epoch 6/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 5493.3687 - val_loss: 3887.6875 Epoch 7/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 3325.1492 - val_loss: 2325.8489 Epoch 8/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 2205.5608 - val_loss: 1810.8602 Epoch 9/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1688.7007 - val_loss: 1616.7953 Epoch 10/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1591.1871 - val_loss: 1453.3530 Epoch 11/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 1453.1786 - val_loss: 1299.7181 Epoch 12/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 1095.0146 - val_loss: 1158.4341 Epoch 13/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1063.7568 - val_loss: 1030.9128 Epoch 14/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1002.9673 - val_loss: 918.6990 Epoch 15/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 890.7899 - val_loss: 813.8540 Epoch 16/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 714.5355 - val_loss: 726.6777 Epoch 17/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 703.5948 - val_loss: 646.3726 Epoch 18/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 578.8361 - val_loss: 577.3212 Epoch 19/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 565.2043 - val_loss: 515.0390 Epoch 20/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 464.5086 - val_loss: 461.8321 Epoch 21/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 470.6168 - val_loss: 413.6169 Epoch 22/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 410.7148 - val_loss: 370.7626 Epoch 23/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 305.1424 - val_loss: 334.5492 Epoch 24/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 296.5522 - val_loss: 303.6536 Epoch 25/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 300.8557 - val_loss: 274.7730 Epoch 26/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 247.7959 - val_loss: 251.9435 Epoch 27/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 243.2590 - val_loss: 228.6191 Epoch 28/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 231.2076 - val_loss: 209.4321 Epoch 29/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 193.1328 - val_loss: 192.8563 Epoch 30/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 167.6097 - val_loss: 178.0872 Epoch 31/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 158.6703 - val_loss: 164.4411 Epoch 32/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 156.9257 - val_loss: 149.6627 Epoch 33/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 149.1139 - val_loss: 138.2066 Epoch 34/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 122.5991 - val_loss: 128.0281 Epoch 35/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 115.1060 - val_loss: 118.2742 Epoch 36/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 105.9250 - val_loss: 108.5738 Epoch 37/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 86.5112 - val_loss: 99.6763 Epoch 38/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 100.7157 - val_loss: 91.3121 Epoch 39/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 77.7610 - val_loss: 84.7809 Epoch 40/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 72.4127 - val_loss: 78.7183 Epoch 41/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 65.0230 - val_loss: 73.2134 Epoch 42/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 72.0471 - val_loss: 67.0902 Epoch 43/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 52.9069 - val_loss: 63.0517 Epoch 44/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 53.5570 - val_loss: 58.3117 Epoch 45/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 42.6700 - val_loss: 53.3609 Epoch 46/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 46.3200 - val_loss: 49.5121 Epoch 47/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 44.7947 - val_loss: 46.4914 Epoch 48/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 36.1730 - val_loss: 43.1407 Epoch 49/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 36.1179 - val_loss: 40.2650 Epoch 50/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 35.2651 - val_loss: 37.9729 8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step Google Neural Network Performance: Mean Squared Error: 35.39515297011495 R-squared: 0.9604307915940967
Notes: You can adjust the number of neurons, layers, epochs, and batch size to optimize the model. The StandardScaler ensures that all features are on the same scale, which is important for neural networks. The models are trained for 50 epochs, but you can adjust the number of epochs based on the performance.
# google Neural Network Predictions Visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test_google[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_google[:250], 'm--', label="Predicted Price")
plt.title("google Stock Price - Actual vs Predicted (Neural Network)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle="--", alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
import joblib
# Save Linear Regression model
joblib.dump(model_google, 'linear_reg_google_model.pkl')
# Save Random Forest model (make sure the correct variable name is used when training the model)
joblib.dump(rf_google, 'random_forest_google_model.pkl')
# Save Neural Network model
model_google.save('best_nn_google_model_tuned.keras')
import joblib
from tensorflow.keras.models import load_model
# Load Linear Regression model
linear_reg_google_model = joblib.load('linear_reg_google_model.pkl')
# Load Random Forest model
rf_google_model = joblib.load('random_forest_google_model.pkl')
# Load Gradient Boosting model
best_gb_google_model = joblib.load('best_gb_google_model.pkl')
# Load Neural Network model
best_nn_google_model = load_model('best_nn_google_model_tuned.keras')
from google.colab import files
# Downloading google models
files.download('linear_reg_google_model.pkl')
files.download('random_forest_google_model.pkl')
files.download('best_gb_google_model.pkl')
files.download('best_nn_google_model_tuned.keras')
Dashboard:
import matplotlib.pyplot as plt
import numpy as np
# Define colors
cyberpunk_blue = '#00FFFF'
cyberpunk_pink = '#FF1493' # This is the pink color for Gradient Boosting
cyberpunk_background = '#000D0D'
random_forest_color = '#FF00FF' # Magenta for Random Forest
lstm_color = '#FFFF00' # Yellow for LSTM
# Create subplots: 2 rows, 2 columns
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
fig.subplots_adjust(hspace=0.6, top=0.70) # Adjusting space between the charts and shifting top margin for title
# Title for the entire figure
fig.suptitle('Google Stock Price Prediction - Model Comparison', fontsize=18, color='white')
# Table with model performance metrics
table_data = [
["Model", "R-squared", "Mean Squared Error"],
["Linear Regression", 0.9997, 0.3042],
["Random Forest", 0.9991, 0.8181],
["Gradient Boosting", 0.9993, 0.6612],
["LSTM Neural Network", 0.9604, 35.3952]
]
# Add the table without extra space
ax_table = fig.add_axes([0.1, 0.78, 0.8, 0.12]) # Shifting the table slightly lower
ax_table.axis('off')
table = ax_table.table(cellText=table_data, colWidths=[0.3]*3, loc='center', cellLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Set table background to black and text to white
for key, cell in table.get_celld().items():
cell.set_edgecolor('white')
cell.set_text_props(color='white')
cell.set_facecolor('black')
# Plot 1: Linear Regression
axs[0, 0].plot(np.arange(len(y_test_google[:250])), y_test_google[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 0].plot(np.arange(len(y_pred_google[:250])), y_pred_google[:250], 'm--', label='Predicted Price (LR)', alpha=0.7)
axs[0, 0].set_title('Linear Regression', fontsize=12, color='white')
axs[0, 0].set_xlabel('Date', fontsize=10, color='white')
axs[0, 0].set_ylabel('Price', fontsize=10, color='white')
axs[0, 0].legend(loc='upper left')
axs[0, 0].grid(True, linestyle='--', alpha=0.7)
axs[0, 0].set_facecolor(cyberpunk_background)
# Plot 2: Random Forest (Magenta)
axs[0, 1].plot(np.arange(len(y_test_google[:250])), y_test_google[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 1].plot(np.arange(len(y_pred_rf_google[:250])), y_pred_rf_google[:250], color=random_forest_color, label='Predicted Price (RF)', alpha=0.7)
axs[0, 1].set_title('Random Forest', fontsize=12, color='white')
axs[0, 1].set_xlabel('Date', fontsize=10, color='white')
axs[0, 1].set_ylabel('Price', fontsize=10, color='white')
axs[0, 1].legend(loc='upper left')
axs[0, 1].grid(True, linestyle='--', alpha=0.7)
axs[0, 1].set_facecolor(cyberpunk_background)
# Plot 3: Gradient Boosting (Pink)
axs[1, 0].plot(np.arange(len(y_test_google[:250])), y_test_google[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 0].plot(np.arange(len(y_pred_gb_google[:250])), y_pred_gb_google[:250], color=cyberpunk_pink, label='Predicted Price (GB)', alpha=0.7) # Pink color
axs[1, 0].set_title('Gradient Boosting', fontsize=12, color='white')
axs[1, 0].set_xlabel('Date', fontsize=10, color='white')
axs[1, 0].set_ylabel('Price', fontsize=10, color='white')
axs[1, 0].legend(loc='upper left')
axs[1, 0].grid(True, linestyle='--', alpha=0.7)
axs[1, 0].set_facecolor(cyberpunk_background)
# Plot 4: LSTM Neural Network (Yellow)
axs[1, 1].plot(np.arange(len(y_test_google[:250])), y_test_google[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 1].plot(np.arange(len(y_pred_nn_google[:250])), y_pred_nn_google[:250], color=lstm_color, label='Predicted Price (NN)', alpha=0.7)
axs[1, 1].set_title('LSTM Neural Network', fontsize=12, color='white')
axs[1, 1].set_xlabel('Date', fontsize=10, color='white')
axs[1, 1].set_ylabel('Price', fontsize=10, color='white')
axs[1, 1].legend(loc='upper left')
axs[1, 1].grid(True, linestyle='--', alpha=0.7)
axs[1, 1].set_facecolor(cyberpunk_background)
# Display the final dashboard
plt.show()