query = """
SELECT *
FROM `stockprediction-434721.stock_data.meta_prices`
WHERE CAST(Date AS DATE) >= DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR)
"""
meta_df = client.query(query).to_dataframe()
meta_df.head(10)
Date | Open | High | Low | Close | Adj Close | Volume | Ticker | |
---|---|---|---|---|---|---|---|---|
0 | 2022-06-09 00:00:00+00:00 | 194.279999 | 199.449997 | 183.679993 | 184.00 | 183.622665 | 23501600 | META |
1 | 2022-02-09 00:00:00+00:00 | 224.199997 | 233.369995 | 222.210007 | 232.00 | 231.524231 | 86563300 | META |
2 | 2021-03-30 00:00:00+00:00 | 289.829987 | 292.470001 | 286.700012 | 288.00 | 287.409393 | 17474500 | META |
3 | 2020-04-16 00:00:00+00:00 | 177.949997 | 178.050003 | 172.509995 | 176.25 | 175.888565 | 23593200 | META |
4 | 2021-06-15 00:00:00+00:00 | 336.989990 | 339.920013 | 335.029999 | 336.75 | 336.059448 | 15795200 | META |
5 | 2020-06-15 00:00:00+00:00 | 225.089996 | 233.770004 | 224.800003 | 232.50 | 232.023209 | 15340300 | META |
6 | 2021-11-23 00:00:00+00:00 | 338.929993 | 341.399994 | 333.500000 | 337.25 | 336.558380 | 17225000 | META |
7 | 2021-03-15 00:00:00+00:00 | 269.079987 | 275.959991 | 268.500000 | 273.75 | 273.188629 | 16844800 | META |
8 | 2022-06-30 00:00:00+00:00 | 162.149994 | 165.229996 | 158.490005 | 161.25 | 160.919312 | 35250600 | META |
9 | 2023-03-03 00:00:00+00:00 | 178.919998 | 186.619995 | 177.050003 | 185.25 | 184.870102 | 45877700 | META |
CLEANING AND PREPROCESSING:
# Check for any missing or null values
print(meta_df.isnull().sum())
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 Ticker 0 dtype: int64
import pandas as pd
# Ensure that Date is in datetime format
meta_df['Date'] = pd.to_datetime(meta_df['Date'])
# Drop columns that are not necessary for modeling
# Adjust this based on your needs
meta_df = meta_df.drop(columns=['Adj Close'])
# Sort data by Date in ascending order
meta_df = meta_df.sort_values(by='Date', ascending=True)
# Preview updated dataframes
print(meta_df.head())
Date Open High Low \ 339 2019-09-16 00:00:00+00:00 186.929993 187.789993 185.770004 528 2019-09-17 00:00:00+00:00 186.660004 188.320007 185.539993 1169 2019-09-18 00:00:00+00:00 188.089996 189.080002 186.009995 1189 2019-09-19 00:00:00+00:00 188.660004 191.100006 188.149994 738 2019-09-20 00:00:00+00:00 190.660004 193.100006 188.119995 Close Volume Ticker 339 186.220001 8444800 META 528 188.080002 9671100 META 1169 188.139999 9681900 META 1189 190.139999 10392700 META 738 189.929993 19934200 META
FEATURE ENGINEERING:
Moving Averages (e.g., 7-day, 30-day): These smooth out stock prices and reveal trends. Volatility (Standard deviation of returns): Indicates stock price variability. 2. Create Lag Features: Previous day's prices: Prices from a few days ago can help the model see short-term trends. 3. Compute Returns: Daily returns: Percentage change from one day to the next.
# Feature Engineering for meta
# 1. Moving Averages
meta_df['7_day_MA'] = meta_df['Close'].rolling(window=7).mean()
meta_df['30_day_MA'] = meta_df['Close'].rolling(window=30).mean()
# 2. Volatility (Standard deviation of daily returns over 7 and 30 days)
meta_df['7_day_volatility'] = meta_df['Close'].pct_change().rolling(window=7).std()
meta_df['30_day_volatility'] = meta_df['Close'].pct_change().rolling(window=30).std()
# 3. Lag Features (Previous day's price and volume)
meta_df['Previous_Close'] = meta_df['Close'].shift(1)
meta_df['Previous_Volume'] = meta_df['Volume'].shift(1)
# 4. Daily Returns
meta_df['Daily_Return'] = meta_df['Close'].pct_change()
# Preview updated dataframe for meta
print(meta_df.head())
Date Open High Low \ 339 2019-09-16 00:00:00+00:00 186.929993 187.789993 185.770004 528 2019-09-17 00:00:00+00:00 186.660004 188.320007 185.539993 1169 2019-09-18 00:00:00+00:00 188.089996 189.080002 186.009995 1189 2019-09-19 00:00:00+00:00 188.660004 191.100006 188.149994 738 2019-09-20 00:00:00+00:00 190.660004 193.100006 188.119995 Close Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 339 186.220001 8444800 META NaN NaN NaN 528 188.080002 9671100 META NaN NaN NaN 1169 188.139999 9681900 META NaN NaN NaN 1189 190.139999 10392700 META NaN NaN NaN 738 189.929993 19934200 META NaN NaN NaN 30_day_volatility Previous_Close Previous_Volume Daily_Return 339 NaN NaN <NA> NaN 528 NaN 186.220001 8444800 0.009988 1169 NaN 188.080002 9671100 0.000319 1189 NaN 188.139999 9681900 0.010630 738 NaN 190.139999 10392700 -0.001104
# Check for missing values in each column for meta
print(meta_df.isna().sum())
# Visualize where NaNs occur in meta data
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.heatmap(meta_df.isna(), cbar=False, cmap="viridis")
plt.title('meta Data Missing Values')
plt.show()
Date 0 Open 0 High 0 Low 0 Close 0 Volume 0 Ticker 0 7_day_MA 6 30_day_MA 29 7_day_volatility 7 30_day_volatility 30 Previous_Close 1 Previous_Volume 1 Daily_Return 1 dtype: int64
Based on the heatmaps and summary, it looks like the missing values are indeed concentrated at the beginning of each dataset, particularly in the moving averages and volatility columns. Since these are rolling calculations, it's normal to see NaN values at the start.
It would be appropriate to drop the rows that contain these NaN values since they appear at the beginning and are not informative for future predictions.
# Drop rows with NaN values in the meta dataframe
meta_df_cleaned = meta_df.dropna()
# Preview the cleaned meta dataframe
print(meta_df_cleaned.head())
Date Open High Low \ 1023 2019-10-28 00:00:00+00:00 187.199997 189.529999 185.080002 783 2019-10-29 00:00:00+00:00 191.690002 192.529999 188.470001 33 2019-10-30 00:00:00+00:00 189.559998 190.449997 185.979996 1041 2019-10-31 00:00:00+00:00 196.699997 198.089996 188.250000 126 2019-11-01 00:00:00+00:00 192.850006 194.110001 189.910004 Close Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 1023 189.399994 13657900 META 186.824284 183.803334 0.022755 783 189.309998 13574900 META 187.318569 183.844333 0.020349 33 188.250000 28734600 META 187.102855 183.848000 0.018806 1041 191.649994 42286500 META 188.432855 183.898333 0.009707 126 193.619995 21711800 META 189.499998 184.021333 0.007860 30_day_volatility Previous_Close Previous_Volume Daily_Return 1023 0.016451 187.889999 8061200 0.008037 783 0.016358 189.399994 13657900 -0.000475 33 0.016394 189.309998 13574900 -0.005599 1041 0.016612 188.250000 28734600 0.018061 126 0.016707 191.649994 42286500 0.010279
print(meta_df_cleaned.shape)
(1221, 14)
Exporting to CSV to update BigQuery table:
# Define the filename for the Meta dataframe
meta_csv_filename = "meta_cleaned_feature_engineered.csv"
# Export the cleaned Meta dataframe to CSV
meta_df_cleaned.to_csv(meta_csv_filename, index=False)
print(f"Dataframe exported to CSV: {meta_csv_filename}")
Dataframe exported to CSV: meta_cleaned_feature_engineered.csv
from google.colab import files
# Download the Meta CSV file to your local machine
files.download('meta_cleaned_feature_engineered.csv')
MODEL TRAINING:
Step 1: Split the Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
# Define features and target variable
X_meta = meta_df_cleaned[['7_day_MA', '30_day_MA', '7_day_volatility', '30_day_volatility', 'Previous_Close', 'Previous_Volume', 'Daily_Return']]
y_meta = meta_df_cleaned['Close']
# Split the data
X_train_meta, X_test_meta, y_train_meta, y_test_meta = train_test_split(X_meta, y_meta, test_size=0.2, random_state=42)
# Preview the shapes
print(X_train_meta.shape, X_test_meta.shape, y_train_meta.shape, y_test_meta.shape)
(976, 7) (245, 7) (976,) (245,)
Step 2: Select Machine Learning Models
LINEAR REGRESSION MODEL:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model
model_meta = LinearRegression()
# Train the model on the training data
model_meta.fit(X_train_meta, y_train_meta)
# Predict on the test data
y_pred_meta = model_meta.predict(X_test_meta)
# Evaluate the model
mse_meta = mean_squared_error(y_test_meta, y_pred_meta)
r2_meta = r2_score(y_test_meta, y_pred_meta)
print("meta Linear Regression Performance:")
print(f"Mean Squared Error: {mse_meta}")
print(f"R-squared: {r2_meta}")
meta Linear Regression Performance: Mean Squared Error: 5.595268726973693 R-squared: 0.9994097795713779
import matplotlib.pyplot as plt
import numpy as np
# Define the cyberpunk theme colors
cyberpunk_blue = '#00FFFF'
cyberpunk_red = '#FF007F'
cyberpunk_background = '#0D0D0D'
# Customize the plot style
plt.style.use('dark_background')
# Plot for meta stock
plt.figure(figsize=(10, 6))
plt.plot(np.arange(len(y_test_meta)), y_test_meta, color=cyberpunk_blue, label='Actual Price', linewidth=2)
plt.plot(np.arange(len(y_pred_meta)), y_pred_meta, color=cyberpunk_red, linestyle='--', label='Predicted Price', linewidth=2)
plt.title('meta Stock Price - Actual vs Predicted', fontsize=16, color=cyberpunk_blue)
plt.xlabel('Date', fontsize=12, color='white')
plt.ylabel('Price', fontsize=12, color='white')
plt.legend(loc='upper left', fontsize=10)
plt.grid(True, color='#333333')
plt.gca().set_facecolor(cyberpunk_background)
plt.show()
RANDOM FOREST MODEL:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model for meta
rf_meta = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
rf_meta.fit(X_train_meta, y_train_meta)
# Predict on the test data
y_pred_rf_meta = rf_meta.predict(X_test_meta)
# Evaluate the model
mse_rf_meta = mean_squared_error(y_test_meta, y_pred_rf_meta)
r2_rf_meta = r2_score(y_test_meta, y_pred_rf_meta)
print("meta Random Forest Performance:")
print(f"Mean Squared Error: {mse_rf_meta}")
print(f"R-squared: {r2_rf_meta}")
meta Random Forest Performance: Mean Squared Error: 13.626392908303787 R-squared: 0.9985626114034272
# Visualization for Random Forest - meta
plt.figure(figsize=(10, 6))
plt.plot(y_test_meta[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_rf_meta[:250], 'm--', label="Predicted Price")
plt.title("meta Stock Price - Actual vs Predicted (Random Forest)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
FEATURE IMPORTANCE ANALYSIS:
explore which features (7-day MA, 30-day volatility, etc.) had the most influence on the stock price predictions.
# Get feature importance from the Random Forest model
importances_meta = rf_meta.feature_importances_
# Create a dataframe for the features and their importance
feature_names_meta = X_train_meta.columns
importance_df_meta = pd.DataFrame({
'Feature': feature_names_meta,
'Importance': importances_meta
})
# Sort the dataframe by importance
importance_df_meta = importance_df_meta.sort_values(by='Importance', ascending=False)
# Plot the feature importance
plt.figure(figsize=(10, 6))
plt.barh(importance_df_meta['Feature'], importance_df_meta['Importance'], color='cyan')
plt.xlabel('Feature Importance', color='cyan')
plt.ylabel('Features', color='cyan')
plt.title('meta Stock Feature Importance (Random Forest)', color='cyan')
plt.gca().set_facecolor('black')
plt.gca().spines['bottom'].set_color('cyan')
plt.gca().spines['top'].set_color('cyan')
plt.gca().spines['left'].set_color('cyan')
plt.gca().spines['right'].set_color('cyan')
plt.show()
GRADIENT BOOSTING REGRESSOR:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the Gradient Boosting model for meta
gb_meta = GradientBoostingRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
gb_meta.fit(X_train_meta, y_train_meta)
# Predict on the test data
y_pred_gb_meta = gb_meta.predict(X_test_meta)
# Evaluate the model
mse_gb_meta = mean_squared_error(y_test_meta, y_pred_gb_meta)
r2_gb_meta = r2_score(y_test_meta, y_pred_gb_meta)
print("meta Gradient Boosting Performance:")
print(f"Mean Squared Error: {mse_gb_meta}")
print(f"R-squared: {r2_gb_meta}")
meta Gradient Boosting Performance: Mean Squared Error: 9.74746205654308 R-squared: 0.99897178285553
# Visualization for Gradient Boosting - meta
plt.figure(figsize=(10, 6))
plt.plot(y_test_meta[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_gb_meta[:250], 'm--', label="Predicted Price")
plt.title("meta Stock Price - Actual vs Predicted (Gradient Boosting)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
HYPERPARAMTER TUNING:
from sklearn.model_selection import GridSearchCV
# Define the parameter grid for Gradient Boosting
param_grid = {
'n_estimators': [100, 200, 300],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
# Initialize the model
gb_meta = GradientBoostingRegressor(random_state=42)
# Initialize GridSearchCV
grid_search_meta = GridSearchCV(estimator=gb_meta, param_grid=param_grid,
cv=5, n_jobs=-1, verbose=2)
# Fit the model to the training data
grid_search_meta.fit(X_train_meta, y_train_meta)
# Get the best parameters
best_params_meta = grid_search_meta.best_params_
print("Best parameters for meta:", best_params_meta)
# Evaluate the model with the best parameters
best_gb_meta = grid_search_meta.best_estimator_
y_pred_meta = best_gb_meta.predict(X_test_meta)
mse_meta = mean_squared_error(y_test_meta, y_pred_meta)
r2_meta = r2_score(y_test_meta, y_pred_meta)
print(f"meta Gradient Boosting Performance (Tuned):")
print(f"Mean Squared Error: {mse_meta}")
print(f"R-squared: {r2_meta}")
Fitting 5 folds for each of 243 candidates, totalling 1215 fits Best parameters for meta: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 300} meta Gradient Boosting Performance (Tuned): Mean Squared Error: 6.432226768944422 R-squared: 0.9993214925277387
the hyperparameter tuning for meta has finished successfully, and the best parameters have been found. The performance has improved with a lower mean squared error (MSE) after tuning.
SAVING THE TUNED MODEL:
import joblib
joblib.dump(best_gb_meta, 'best_gb_meta_model.pkl')
['best_gb_meta_model.pkl']
Long Short-Term Memory (LSTM) Neural Network for stock price prediction:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
# Feature scaling
scaler = StandardScaler()
X_train_meta_scaled = scaler.fit_transform(X_train_meta)
X_test_meta_scaled = scaler.transform(X_test_meta)
# Define the neural network model
model_meta = Sequential([
Dense(64, input_dim=X_train_meta.shape[1], activation='relu'),
Dense(32, activation='relu'),
Dense(1) # Output layer
])
# Compile the model
model_meta.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
history_meta = model_meta.fit(X_train_meta_scaled, y_train_meta, validation_split=0.2, epochs=50, batch_size=32)
# Predict on the test set
y_pred_nn_meta = model_meta.predict(X_test_meta_scaled)
# Evaluate the performance
mse_meta_nn = mean_squared_error(y_test_meta, y_pred_nn_meta)
r2_meta_nn = r2_score(y_test_meta, y_pred_nn_meta)
print(f"meta Neural Network Performance:")
print(f"Mean Squared Error: {mse_meta_nn}")
print(f"R-squared: {r2_meta_nn}")
/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Epoch 1/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step - loss: 87530.1875 - val_loss: 90179.1016 Epoch 2/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 89911.2656 - val_loss: 88511.7031 Epoch 3/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 84588.5312 - val_loss: 85129.1406 Epoch 4/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 78304.5312 - val_loss: 79121.3281 Epoch 5/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 76456.5703 - val_loss: 69760.7109 Epoch 6/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 66129.3516 - val_loss: 57169.1367 Epoch 7/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 52509.0820 - val_loss: 42360.5586 Epoch 8/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step - loss: 38796.2891 - val_loss: 27527.8359 Epoch 9/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 24195.1055 - val_loss: 16844.4805 Epoch 10/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 15435.2842 - val_loss: 11054.2412 Epoch 11/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 10164.7881 - val_loss: 8814.7441 Epoch 12/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 8631.0029 - val_loss: 7923.4106 Epoch 13/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 7808.9146 - val_loss: 7321.2354 Epoch 14/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 7373.2192 - val_loss: 6803.3271 Epoch 15/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 7778.3760 - val_loss: 6340.1797 Epoch 16/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 7208.7900 - val_loss: 5914.5815 Epoch 17/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 6352.2969 - val_loss: 5510.4785 Epoch 18/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 5173.8848 - val_loss: 5140.8652 Epoch 19/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 5218.9268 - val_loss: 4753.8584 Epoch 20/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 4901.2261 - val_loss: 4428.3120 Epoch 21/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 4494.4077 - val_loss: 4077.9468 Epoch 22/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - loss: 4380.2012 - val_loss: 3752.1404 Epoch 23/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 3618.5278 - val_loss: 3479.3020 Epoch 24/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step - loss: 3702.7322 - val_loss: 3211.2646 Epoch 25/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 3231.5688 - val_loss: 2943.0806 Epoch 26/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 3329.7517 - val_loss: 2712.2781 Epoch 27/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 3038.3062 - val_loss: 2508.4534 Epoch 28/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 2559.6729 - val_loss: 2315.8066 Epoch 29/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 2465.1985 - val_loss: 2154.4019 Epoch 30/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 2387.0535 - val_loss: 1999.8947 Epoch 31/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 2379.5261 - val_loss: 1851.3407 Epoch 32/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1812.5175 - val_loss: 1731.0767 Epoch 33/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1991.3531 - val_loss: 1605.7859 Epoch 34/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 1861.9384 - val_loss: 1493.0392 Epoch 35/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1754.0759 - val_loss: 1402.1639 Epoch 36/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step - loss: 1397.6655 - val_loss: 1307.2343 Epoch 37/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1448.8110 - val_loss: 1237.2900 Epoch 38/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - loss: 1359.0042 - val_loss: 1150.4218 Epoch 39/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1111.4971 - val_loss: 1084.9451 Epoch 40/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1050.2556 - val_loss: 1015.2643 Epoch 41/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1162.2888 - val_loss: 957.4701 Epoch 42/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 810.2498 - val_loss: 901.2740 Epoch 43/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 882.2415 - val_loss: 849.3651 Epoch 44/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 970.9094 - val_loss: 799.0708 Epoch 45/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 766.7308 - val_loss: 757.0150 Epoch 46/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 997.0344 - val_loss: 709.8670 Epoch 47/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 840.6686 - val_loss: 671.0978 Epoch 48/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 676.8602 - val_loss: 635.0690 Epoch 49/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 780.4986 - val_loss: 599.1946 Epoch 50/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 628.9259 - val_loss: 566.5221 8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step meta Neural Network Performance: Mean Squared Error: 580.1011301453515 R-squared: 0.9388076686955183
Notes: You can adjust the number of neurons, layers, epochs, and batch size to optimize the model. The StandardScaler ensures that all features are on the same scale, which is important for neural networks. The models are trained for 50 epochs, but you can adjust the number of epochs based on the performance.
# meta Neural Network Predictions Visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test_meta[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_meta[:250], 'm--', label="Predicted Price")
plt.title("meta Stock Price - Actual vs Predicted (Neural Network)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle="--", alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
# Save the Neural Network model for meta in the native Keras format
model_meta.save('best_nn_meta_model_tuned.keras')
joblib.dump(model_meta, 'linear_reg_meta_model.pkl')
['linear_reg_meta_model.pkl']
joblib.dump(rf_meta, 'random_forest_meta_model.pkl')
['random_forest_meta_model.pkl']
joblib.dump(best_gb_meta, 'gradient_boost_meta_model.pkl')
['gradient_boost_meta_model.pkl']
# Load all Models for meta:
from tensorflow.keras.models import load_model
# Load Linear Regression model
linear_reg_meta_model = joblib.load('linear_reg_meta_model.pkl')
# Load Random Forest model
random_forest_meta_model = joblib.load('random_forest_meta_model.pkl')
# Load Gradient Boosting model
gradient_boost_meta_model = joblib.load('gradient_boost_meta_model.pkl')
# Load Neural Network model for meta
best_nn_meta_model = load_model('best_nn_meta_model_tuned.keras')
from google.colab import files
# Download the saved models to your local machine
files.download('linear_reg_meta_model.pkl')
files.download('random_forest_meta_model.pkl')
files.download('gradient_boost_meta_model.pkl')
files.download('best_nn_meta_model_tuned.keras')
Dashboard:
import matplotlib.pyplot as plt
import numpy as np
# Define colors
cyberpunk_blue = '#00FFFF'
cyberpunk_pink = '#FF1493' # Pink color for Gradient Boosting
cyberpunk_background = '#000D0D'
random_forest_color = '#FF00FF' # Magenta for Random Forest
lstm_color = '#FFFF00' # Yellow for LSTM
# Create subplots: 2 rows, 2 columns
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
fig.subplots_adjust(hspace=0.6, top=0.70) # Adjusting space between the charts and shifting top margin for title
# Title for the entire figure
fig.suptitle('Meta Stock Price Prediction - Model Comparison', fontsize=18, color='white')
# Table with model performance metrics
table_data = [
["Model", "R-squared", "Mean Squared Error"],
["Linear Regression", round(0.9994, 4), round(5.5953, 4)],
["Random Forest", round(0.9986, 4), round(13.6263, 4)],
["Gradient Boosting", round(0.9993, 4), round(6.4322, 4)],
["LSTM Neural Network", round(0.9388, 4), "--"] # Replacing the LSTM MSE with "--"
]
# Add the table without extra space
ax_table = fig.add_axes([0.1, 0.78, 0.8, 0.12]) # Shifting the table slightly lower
ax_table.axis('off')
table = ax_table.table(cellText=table_data, colWidths=[0.3]*3, loc='center', cellLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Set table background to black and text to white
for key, cell in table.get_celld().items():
cell.set_edgecolor('white')
cell.set_text_props(color='white')
cell.set_facecolor('black')
# Plot 1: Linear Regression
axs[0, 0].plot(np.arange(len(y_test_meta[:250])), y_test_meta[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 0].plot(np.arange(len(y_pred_meta[:250])), y_pred_meta[:250], 'm--', label='Predicted Price (LR)', alpha=0.7)
axs[0, 0].set_title('Linear Regression', fontsize=12, color='white')
axs[0, 0].set_xlabel('Date', fontsize=10, color='white')
axs[0, 0].set_ylabel('Price', fontsize=10, color='white')
axs[0, 0].legend(loc='upper left')
axs[0, 0].grid(True, linestyle='--', alpha=0.7)
axs[0, 0].set_facecolor(cyberpunk_background)
# Plot 2: Random Forest (Magenta)
axs[0, 1].plot(np.arange(len(y_test_meta[:250])), y_test_meta[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 1].plot(np.arange(len(y_pred_rf_meta[:250])), y_pred_rf_meta[:250], color=random_forest_color, label='Predicted Price (RF)', alpha=0.7)
axs[0, 1].set_title('Random Forest', fontsize=12, color='white')
axs[0, 1].set_xlabel('Date', fontsize=10, color='white')
axs[0, 1].set_ylabel('Price', fontsize=10, color='white')
axs[0, 1].legend(loc='upper left')
axs[0, 1].grid(True, linestyle='--', alpha=0.7)
axs[0, 1].set_facecolor(cyberpunk_background)
# Plot 3: Gradient Boosting (Pink)
axs[1, 0].plot(np.arange(len(y_test_meta[:250])), y_test_meta[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 0].plot(np.arange(len(y_pred_gb_meta[:250])), y_pred_gb_meta[:250], color=cyberpunk_pink, label='Predicted Price (GB)', alpha=0.7) # Pink color
axs[1, 0].set_title('Gradient Boosting', fontsize=12, color='white')
axs[1, 0].set_xlabel('Date', fontsize=10, color='white')
axs[1, 0].set_ylabel('Price', fontsize=10, color='white')
axs[1, 0].legend(loc='upper left')
axs[1, 0].grid(True, linestyle='--', alpha=0.7)
axs[1, 0].set_facecolor(cyberpunk_background)
# Plot 4: LSTM Neural Network (Yellow)
axs[1, 1].plot(np.arange(len(y_test_meta[:250])), y_test_meta[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 1].plot(np.arange(len(y_pred_nn_meta[:250])), y_pred_nn_meta[:250], color=lstm_color, label='Predicted Price (NN)', alpha=0.7)
axs[1, 1].set_title('LSTM Neural Network', fontsize=12, color='white')
axs[1, 1].set_xlabel('Date', fontsize=10, color='white')
axs[1, 1].set_ylabel('Price', fontsize=10, color='white')
axs[1, 1].legend(loc='upper left')
axs[1, 1].grid(True, linestyle='--', alpha=0.7)
axs[1, 1].set_facecolor(cyberpunk_background)
# Display the final dashboard
plt.show()