query = """
SELECT *
FROM `stockprediction-434721.stock_data.nflx_prices`
WHERE CAST(Date AS DATE) >= DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR)
"""
nflx_df = client.query(query).to_dataframe()
nflx_df.head(10)
Date | Open | High | Low | Close | Adj Close | Volume | Ticker | |
---|---|---|---|---|---|---|---|---|
0 | 2019-11-04 00:00:00+00:00 | 288.00 | 295.390015 | 287.160004 | 292.859985 | 292.859985 | 5566200 | NFLX |
1 | 2022-12-12 00:00:00+00:00 | 320.00 | 323.179993 | 308.850006 | 315.179993 | 315.179993 | 10148600 | NFLX |
2 | 2022-03-04 00:00:00+00:00 | 368.00 | 374.820007 | 357.170013 | 361.730011 | 361.730011 | 5325500 | NFLX |
3 | 2020-06-08 00:00:00+00:00 | 416.00 | 420.799988 | 406.500000 | 419.489990 | 419.489990 | 5851500 | NFLX |
4 | 2023-08-29 00:00:00+00:00 | 416.00 | 432.170013 | 414.500000 | 429.989990 | 429.989990 | 4486700 | NFLX |
5 | 2020-11-16 00:00:00+00:00 | 480.00 | 485.579987 | 477.299988 | 479.100006 | 479.100006 | 3953600 | NFLX |
6 | 2024-04-01 00:00:00+00:00 | 608.00 | 615.109985 | 605.570007 | 614.309998 | 614.309998 | 2115900 | NFLX |
7 | 2022-02-02 00:00:00+00:00 | 448.25 | 451.980011 | 426.480011 | 429.480011 | 429.480011 | 14346000 | NFLX |
8 | 2021-03-12 00:00:00+00:00 | 512.50 | 526.510010 | 506.589996 | 518.020020 | 518.020020 | 3981700 | NFLX |
9 | 2021-08-04 00:00:00+00:00 | 513.00 | 517.979980 | 510.369995 | 517.349976 | 517.349976 | 2039400 | NFLX |
CLEANING AND PREPROCESSING:
# Check for any missing or null values
print(nflx_df.isnull().sum())
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 Ticker 0 dtype: int64
import pandas as pd
# Ensure that Date is in datetime format
nflx_df['Date'] = pd.to_datetime(nflx_df['Date'])
# Drop columns that are not necessary for modeling
# Adjust this based on your needs
nflx_df = nflx_df.drop(columns=['Adj Close'])
# Sort data by Date in ascending order
nflx_df = nflx_df.sort_values(by='Date', ascending=True)
# Preview updated dataframes
print(nflx_df.head())
Date Open High Low \ 420 2019-09-16 00:00:00+00:00 294.230011 297.429993 289.779999 101 2019-09-17 00:00:00+00:00 294.500000 299.149994 291.790009 307 2019-09-18 00:00:00+00:00 294.989990 296.049988 287.450012 1102 2019-09-19 00:00:00+00:00 291.559998 293.809998 283.399994 252 2019-09-20 00:00:00+00:00 280.260010 282.500000 266.000000 Close Volume Ticker 420 294.290009 5307400 NFLX 101 298.600006 4777100 NFLX 307 291.559998 7811100 NFLX 1102 286.600006 8461300 NFLX 252 270.750000 23832800 NFLX
FEATURE ENGINEERING:
Moving Averages (e.g., 7-day, 30-day): These smooth out stock prices and reveal trends. Volatility (Standard deviation of returns): Indicates stock price variability. 2. Create Lag Features: Previous day's prices: Prices from a few days ago can help the model see short-term trends. 3. Compute Returns: Daily returns: Percentage change from one day to the next.
# Feature Engineering for netflix
# 1. Moving Averages
nflx_df['7_day_MA'] = nflx_df['Close'].rolling(window=7).mean()
nflx_df['30_day_MA'] = nflx_df['Close'].rolling(window=30).mean()
# 2. Volatility (Standard deviation of daily returns over 7 and 30 days)
nflx_df['7_day_volatility'] = nflx_df['Close'].pct_change().rolling(window=7).std()
nflx_df['30_day_volatility'] = nflx_df['Close'].pct_change().rolling(window=30).std()
# 3. Lag Features (Previous day's price and volume)
nflx_df['Previous_Close'] = nflx_df['Close'].shift(1)
nflx_df['Previous_Volume'] = nflx_df['Volume'].shift(1)
# 4. Daily Returns
nflx_df['Daily_Return'] = nflx_df['Close'].pct_change()
# Preview updated dataframe for netflix
print(nflx_df.head())
Date Open High Low \ 420 2019-09-16 00:00:00+00:00 294.230011 297.429993 289.779999 101 2019-09-17 00:00:00+00:00 294.500000 299.149994 291.790009 307 2019-09-18 00:00:00+00:00 294.989990 296.049988 287.450012 1102 2019-09-19 00:00:00+00:00 291.559998 293.809998 283.399994 252 2019-09-20 00:00:00+00:00 280.260010 282.500000 266.000000 Close Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 420 294.290009 5307400 NFLX NaN NaN NaN 101 298.600006 4777100 NFLX NaN NaN NaN 307 291.559998 7811100 NFLX NaN NaN NaN 1102 286.600006 8461300 NFLX NaN NaN NaN 252 270.750000 23832800 NFLX NaN NaN NaN 30_day_volatility Previous_Close Previous_Volume Daily_Return 420 NaN NaN <NA> NaN 101 NaN 294.290009 5307400 0.014645 307 NaN 298.600006 4777100 -0.023577 1102 NaN 291.559998 7811100 -0.017012 252 NaN 286.600006 8461300 -0.055304
# Check for missing values in each column for netflix
print(nflx_df.isna().sum())
# Visualize where NaNs occur in netflix data
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.heatmap(nflx_df.isna(), cbar=False, cmap="viridis")
plt.title('netflix Data Missing Values')
plt.show()
Date 0 Open 0 High 0 Low 0 Close 0 Volume 0 Ticker 0 7_day_MA 6 30_day_MA 29 7_day_volatility 7 30_day_volatility 30 Previous_Close 1 Previous_Volume 1 Daily_Return 1 dtype: int64
Based on the heatmaps and summary, it looks like the missing values are indeed concentrated at the beginning of each dataset, particularly in the moving averages and volatility columns. Since these are rolling calculations, it's normal to see NaN values at the start.
It would be appropriate to drop the rows that contain these NaN values since they appear at the beginning and are not informative for future predictions.
# Drop rows with NaN values in the netflix dataframe
nflx_df_cleaned = nflx_df.dropna()
# Preview the cleaned netflix dataframe
print(nflx_df_cleaned.head())
Date Open High Low \ 1061 2019-10-28 00:00:00+00:00 278.049988 285.750000 277.350006 852 2019-10-29 00:00:00+00:00 281.869995 284.410004 277.549988 940 2019-10-30 00:00:00+00:00 284.339996 293.489990 283.000000 44 2019-10-31 00:00:00+00:00 291.000000 291.450012 284.779999 483 2019-11-01 00:00:00+00:00 288.700012 289.119995 283.019989 Close Volume Ticker 7_day_MA 30_day_MA 7_day_volatility \ 1061 281.859985 6248400 NFLX 274.498566 275.078332 0.032596 852 281.209991 4356200 NFLX 275.342852 274.498664 0.021261 940 291.450012 9345600 NFLX 277.257141 274.494998 0.024724 44 287.410004 5090000 NFLX 280.217141 274.521998 0.016872 483 286.809998 5594300 NFLX 282.437143 275.057331 0.017238 30_day_volatility Previous_Close Previous_Volume Daily_Return 1061 0.025349 276.820007 4747800 0.018207 852 0.025173 281.859985 6248400 -0.002306 940 0.025751 281.209991 4356200 0.036414 44 0.025684 291.450012 9345600 -0.013862 483 0.023443 287.410004 5090000 -0.002088
print(nflx_df_cleaned.shape)
(1221, 14)
Exporting to CSV to update BigQuery table:
# Define the filename for the NFLX dataframe
nflx_csv_filename = "nflx_cleaned_feature_engineered.csv"
# Export the cleaned NFLX dataframe to CSV
nflx_df_cleaned.to_csv(nflx_csv_filename, index=False)
print(f"Dataframe exported to CSV: {nflx_csv_filename}")
Dataframe exported to CSV: nflx_cleaned_feature_engineered.csv
from google.colab import files
# Download the Netflix CSV file to your local machine
files.download('nflx_cleaned_feature_engineered.csv')
MODEL TRAINING:
Step 1: Split the Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
# Define features and target variable
X_nflx = nflx_df_cleaned[['7_day_MA', '30_day_MA', '7_day_volatility', '30_day_volatility', 'Previous_Close', 'Previous_Volume', 'Daily_Return']]
y_nflx = nflx_df_cleaned['Close']
# Split the data
X_train_nflx, X_test_nflx, y_train_nflx, y_test_nflx = train_test_split(X_nflx, y_nflx, test_size=0.2, random_state=42)
# Preview the shapes
print(X_train_nflx.shape, X_test_nflx.shape, y_train_nflx.shape, y_test_nflx.shape)
(976, 7) (245, 7) (976,) (245,)
Step 2: Select Machine Learning Models
LINEAR REGRESSION MODEL:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model
model_nflx = LinearRegression()
# Train the model on the training data
model_nflx.fit(X_train_nflx, y_train_nflx)
# Predict on the test data
y_pred_nflx = model_nflx.predict(X_test_nflx)
# Evaluate the model
mse_nflx = mean_squared_error(y_test_nflx, y_pred_nflx)
r2_nflx = r2_score(y_test_nflx, y_pred_nflx)
print("netflix Linear Regression Performance:")
print(f"Mean Squared Error: {mse_nflx}")
print(f"R-squared: {r2_nflx}")
netflix Linear Regression Performance: Mean Squared Error: 10.327358375173077 R-squared: 0.9993540336227533
import matplotlib.pyplot as plt
import numpy as np
# Define the cyberpunk theme colors
cyberpunk_blue = '#00FFFF'
cyberpunk_red = '#FF007F'
cyberpunk_background = '#0D0D0D'
# Customize the plot style
plt.style.use('dark_background')
# Plot for netflix stock
plt.figure(figsize=(10, 6))
plt.plot(np.arange(len(y_test_nflx)), y_test_nflx, color=cyberpunk_blue, label='Actual Price', linewidth=2)
plt.plot(np.arange(len(y_pred_nflx)), y_pred_nflx, color=cyberpunk_red, linestyle='--', label='Predicted Price', linewidth=2)
plt.title('netflix Stock Price - Actual vs Predicted', fontsize=16, color=cyberpunk_blue)
plt.xlabel('Date', fontsize=12, color='white')
plt.ylabel('Price', fontsize=12, color='white')
plt.legend(loc='upper left', fontsize=10)
plt.grid(True, color='#333333')
plt.gca().set_facecolor(cyberpunk_background)
plt.show()
RANDOM FOREST MODEL:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the model for netflix
rf_nflx = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
rf_nflx.fit(X_train_nflx, y_train_nflx)
# Predict on the test data
y_pred_rf_nflx = rf_nflx.predict(X_test_nflx)
# Evaluate the model
mse_rf_nflx = mean_squared_error(y_test_nflx, y_pred_rf_nflx)
r2_rf_nflx = r2_score(y_test_nflx, y_pred_rf_nflx)
print("netflix Random Forest Performance:")
print(f"Mean Squared Error: {mse_rf_nflx}")
print(f"R-squared: {r2_rf_nflx}")
netflix Random Forest Performance: Mean Squared Error: 25.232584603717342 R-squared: 0.9984217259948858
# Visualization for Random Forest - netflix
plt.figure(figsize=(10, 6))
plt.plot(y_test_nflx[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_rf_nflx[:250], 'm--', label="Predicted Price")
plt.title("netflix Stock Price - Actual vs Predicted (Random Forest)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
FEATURE IMPORTANCE ANALYSIS:
explore which features (7-day MA, 30-day volatility, etc.) had the most influence on the stock price predictions.
# Get feature importance from the Random Forest model
importances_nflx = rf_nflx.feature_importances_
# Create a dataframe for the features and their importance
feature_names_nflx = X_train_nflx.columns
importance_df_nflx = pd.DataFrame({
'Feature': feature_names_nflx,
'Importance': importances_nflx
})
# Sort the dataframe by importance
importance_df_nflx = importance_df_nflx.sort_values(by='Importance', ascending=False)
# Plot the feature importance
plt.figure(figsize=(10, 6))
plt.barh(importance_df_nflx['Feature'], importance_df_nflx['Importance'], color='cyan')
plt.xlabel('Feature Importance', color='cyan')
plt.ylabel('Features', color='cyan')
plt.title('netflix Stock Feature Importance (Random Forest)', color='cyan')
plt.gca().set_facecolor('black')
plt.gca().spines['bottom'].set_color('cyan')
plt.gca().spines['top'].set_color('cyan')
plt.gca().spines['left'].set_color('cyan')
plt.gca().spines['right'].set_color('cyan')
plt.show()
GRADIENT BOOSTING REGRESSOR:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Initialize the Gradient Boosting model for netflix
gb_nflx = GradientBoostingRegressor(n_estimators=100, random_state=42)
# Train the model on the training data
gb_nflx.fit(X_train_nflx, y_train_nflx)
# Predict on the test data
y_pred_gb_nflx = gb_nflx.predict(X_test_nflx)
# Evaluate the model
mse_gb_nflx = mean_squared_error(y_test_nflx, y_pred_gb_nflx)
r2_gb_nflx = r2_score(y_test_nflx, y_pred_gb_nflx)
print("netflix Gradient Boosting Performance:")
print(f"Mean Squared Error: {mse_gb_nflx}")
print(f"R-squared: {r2_gb_nflx}")
netflix Gradient Boosting Performance: Mean Squared Error: 14.971628008497358 R-squared: 0.9990635390043805
# Visualization for Gradient Boosting - netflix
plt.figure(figsize=(10, 6))
plt.plot(y_test_nflx[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_gb_nflx[:250], 'm--', label="Predicted Price")
plt.title("netflix Stock Price - Actual vs Predicted (Gradient Boosting)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
HYPERPARAMTER TUNING:
from sklearn.model_selection import GridSearchCV
# Define the parameter grid for Gradient Boosting
param_grid = {
'n_estimators': [100, 200, 300],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
# Initialize the model
gb_nflx = GradientBoostingRegressor(random_state=42)
# Initialize GridSearchCV
grid_search_nflx = GridSearchCV(estimator=gb_nflx, param_grid=param_grid,
cv=5, n_jobs=-1, verbose=2)
# Fit the model to the training data
grid_search_nflx.fit(X_train_nflx, y_train_nflx)
# Get the best parameters
best_params_nflx = grid_search_nflx.best_params_
print("Best parameters for netflix:", best_params_nflx)
# Evaluate the model with the best parameters
best_gb_nflx = grid_search_nflx.best_estimator_
y_pred_nflx = best_gb_nflx.predict(X_test_nflx)
mse_nflx = mean_squared_error(y_test_nflx, y_pred_nflx)
r2_nflx = r2_score(y_test_nflx, y_pred_nflx)
print(f"netflix Gradient Boosting Performance (Tuned):")
print(f"Mean Squared Error: {mse_nflx}")
print(f"R-squared: {r2_nflx}")
Fitting 5 folds for each of 243 candidates, totalling 1215 fits Best parameters for netflix: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300} netflix Gradient Boosting Performance (Tuned): Mean Squared Error: 10.105235803043964 R-squared: 0.9993679271769431
the hyperparameter tuning for netflix has finished successfully, and the best parameters have been found. The performance has improved with a lower mean squared error (MSE) after tuning.
SAVING THE TUNED MODEL:
import joblib
joblib.dump(best_gb_nflx, 'best_gb_nflx_model.pkl')
['best_gb_nflx_model.pkl']
Long Short-Term Memory (LSTM) Neural Network for stock price prediction:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
# Feature scaling
scaler = StandardScaler()
X_train_nflx_scaled = scaler.fit_transform(X_train_nflx)
X_test_nflx_scaled = scaler.transform(X_test_nflx)
# Define the neural network model
model_nflx = Sequential([
Dense(64, input_dim=X_train_nflx.shape[1], activation='relu'),
Dense(32, activation='relu'),
Dense(1) # Output layer
])
# Compile the model
model_nflx.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
history_nflx = model_nflx.fit(X_train_nflx_scaled, y_train_nflx, validation_split=0.2, epochs=50, batch_size=32)
# Predict on the test set
y_pred_nn_nflx = model_nflx.predict(X_test_nflx_scaled)
# Evaluate the performance
mse_nflx_nn = mean_squared_error(y_test_nflx, y_pred_nn_nflx)
r2_nflx_nn = r2_score(y_test_nflx, y_pred_nn_nflx)
print(f"netflix Neural Network Performance:")
print(f"Mean Squared Error: {mse_nflx_nn}")
print(f"R-squared: {r2_nflx_nn}")
Epoch 1/50
/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
25/25 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - loss: 216253.6562 - val_loss: 208144.9375 Epoch 2/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 209883.8125 - val_loss: 205257.5781 Epoch 3/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 210636.1406 - val_loss: 199611.3281 Epoch 4/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 210627.8125 - val_loss: 189762.2500 Epoch 5/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 192534.0156 - val_loss: 174328.0000 Epoch 6/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 170550.0156 - val_loss: 152279.5938 Epoch 7/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 146364.9375 - val_loss: 124636.0078 Epoch 8/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 120195.6875 - val_loss: 94882.2109 Epoch 9/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 89684.1172 - val_loss: 67454.3984 Epoch 10/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 65154.5469 - val_loss: 48020.7188 Epoch 11/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 47875.6992 - val_loss: 36701.2305 Epoch 12/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 40972.8906 - val_loss: 31700.3594 Epoch 13/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 33666.8750 - val_loss: 29274.8594 Epoch 14/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 30109.2363 - val_loss: 27351.1855 Epoch 15/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 27447.8691 - val_loss: 25704.6250 Epoch 16/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 26988.6113 - val_loss: 24029.6758 Epoch 17/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 23974.8398 - val_loss: 22322.5586 Epoch 18/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 23558.0820 - val_loss: 20492.2305 Epoch 19/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 21343.6816 - val_loss: 18795.2930 Epoch 20/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 19022.3711 - val_loss: 17136.1270 Epoch 21/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 17743.8926 - val_loss: 15673.5693 Epoch 22/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 16038.2686 - val_loss: 14233.0918 Epoch 23/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 14016.5605 - val_loss: 12953.1543 Epoch 24/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 13622.3691 - val_loss: 11739.5625 Epoch 25/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 12038.0537 - val_loss: 10665.9180 Epoch 26/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 10580.6299 - val_loss: 9711.5078 Epoch 27/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 9776.0381 - val_loss: 8859.5576 Epoch 28/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 9056.3379 - val_loss: 8208.1045 Epoch 29/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 9131.7012 - val_loss: 7682.9126 Epoch 30/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 7173.3511 - val_loss: 7094.7402 Epoch 31/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 6377.7529 - val_loss: 6564.9277 Epoch 32/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 5973.6875 - val_loss: 6235.0234 Epoch 33/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 5304.8926 - val_loss: 5877.6377 Epoch 34/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 5260.2261 - val_loss: 5473.6201 Epoch 35/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 4812.6338 - val_loss: 5191.5259 Epoch 36/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 3749.5957 - val_loss: 4930.2109 Epoch 37/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 4377.6655 - val_loss: 4524.2007 Epoch 38/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 3792.4695 - val_loss: 4301.7954 Epoch 39/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 4283.6621 - val_loss: 4102.6704 Epoch 40/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 4053.4292 - val_loss: 3863.5554 Epoch 41/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 3557.2209 - val_loss: 3669.3376 Epoch 42/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 2909.4841 - val_loss: 3516.3633 Epoch 43/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 3009.7461 - val_loss: 3333.2598 Epoch 44/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 4017.2004 - val_loss: 3151.9636 Epoch 45/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 2539.9971 - val_loss: 2987.6809 Epoch 46/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 2814.1870 - val_loss: 2852.9426 Epoch 47/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 2359.8613 - val_loss: 2700.8086 Epoch 48/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 1990.2694 - val_loss: 2596.3496 Epoch 49/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1941.8756 - val_loss: 2430.3418 Epoch 50/50 25/25 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 1693.8661 - val_loss: 2328.1714 8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step netflix Neural Network Performance: Mean Squared Error: 1488.0385173761342 R-squared: 0.9069246156322229
Notes: You can adjust the number of neurons, layers, epochs, and batch size to optimize the model. The StandardScaler ensures that all features are on the same scale, which is important for neural networks. The models are trained for 50 epochs, but you can adjust the number of epochs based on the performance.
# netflix Neural Network Predictions Visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test_nflx[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_nflx[:250], 'm--', label="Predicted Price")
plt.title("netflix Stock Price - Actual vs Predicted (Neural Network)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle="--", alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()
# Save the Neural Network model for netflix in the native Keras format
model_nflx.save('best_nn_nflx_model_tuned.keras')
joblib.dump(model_nflx, 'linear_reg_nflx_model.pkl')
['linear_reg_nflx_model.pkl']
joblib.dump(rf_nflx, 'random_forest_nflx_model.pkl')
['random_forest_nflx_model.pkl']
joblib.dump(best_gb_nflx, 'gradient_boost_nflx_model.pkl')
['gradient_boost_nflx_model.pkl']
# Load all Models for netflix:
from tensorflow.keras.models import load_model
# Load Linear Regression model
linear_reg_nflx_model = joblib.load('linear_reg_nflx_model.pkl')
# Load Random Forest model
random_forest_nflx_model = joblib.load('random_forest_nflx_model.pkl')
# Load Gradient Boosting model
gradient_boost_nflx_model = joblib.load('gradient_boost_nflx_model.pkl')
# Load Neural Network model for netflix
best_nn_nflx_model = load_model('best_nn_nflx_model_tuned.keras')
from google.colab import files
# Download the saved models to your local machine
files.download('linear_reg_nflx_model.pkl')
files.download('random_forest_nflx_model.pkl')
files.download('gradient_boost_nflx_model.pkl')
files.download('best_nn_nflx_model_tuned.keras')
Dashboard:
import matplotlib.pyplot as plt
import numpy as np
# Define colors
cyberpunk_blue = '#00FFFF'
cyberpunk_pink = '#FF1493' # Pink color for Gradient Boosting
cyberpunk_background = '#000D0D'
random_forest_color = '#FF00FF' # Magenta for Random Forest
lstm_color = '#FFFF00' # Yellow for LSTM
# Create subplots: 2 rows, 2 columns
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
fig.subplots_adjust(hspace=0.6, top=0.70) # Adjusting space between the charts and shifting top margin for title
# Title for the entire figure
fig.suptitle('Netflix Stock Price Prediction - Model Comparison', fontsize=18, color='white')
# Table with model performance metrics
table_data = [
["Model", "R-squared", "Mean Squared Error"],
["Linear Regression", round(0.9994, 4), round(10.3274, 4)],
["Random Forest", round(0.9984, 4), round(25.2326, 4)],
["Gradient Boosting", round(0.9994, 4), round(10.1052, 4)],
["LSTM Neural Network", round(0.9069, 4), "--"] # Replacing the LSTM MSE with "--"
]
# Add the table without extra space
ax_table = fig.add_axes([0.1, 0.78, 0.8, 0.12]) # Shifting the table slightly lower
ax_table.axis('off')
table = ax_table.table(cellText=table_data, colWidths=[0.3]*3, loc='center', cellLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Set table background to black and text to white
for key, cell in table.get_celld().items():
cell.set_edgecolor('white')
cell.set_text_props(color='white')
cell.set_facecolor('black')
# Plot 1: Linear Regression
axs[0, 0].plot(np.arange(len(y_test_nflx[:250])), y_test_nflx[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 0].plot(np.arange(len(y_pred_nflx[:250])), y_pred_nflx[:250], 'm--', label='Predicted Price (LR)', alpha=0.7)
axs[0, 0].set_title('Linear Regression', fontsize=12, color='white')
axs[0, 0].set_xlabel('Date', fontsize=10, color='white')
axs[0, 0].set_ylabel('Price', fontsize=10, color='white')
axs[0, 0].legend(loc='upper left')
axs[0, 0].grid(True, linestyle='--', alpha=0.7)
axs[0, 0].set_facecolor(cyberpunk_background)
# Plot 2: Random Forest (Magenta)
axs[0, 1].plot(np.arange(len(y_test_nflx[:250])), y_test_nflx[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 1].plot(np.arange(len(y_pred_rf_nflx[:250])), y_pred_rf_nflx[:250], color=random_forest_color, label='Predicted Price (RF)', alpha=0.7)
axs[0, 1].set_title('Random Forest', fontsize=12, color='white')
axs[0, 1].set_xlabel('Date', fontsize=10, color='white')
axs[0, 1].set_ylabel('Price', fontsize=10, color='white')
axs[0, 1].legend(loc='upper left')
axs[0, 1].grid(True, linestyle='--', alpha=0.7)
axs[0, 1].set_facecolor(cyberpunk_background)
# Plot 3: Gradient Boosting (Pink)
axs[1, 0].plot(np.arange(len(y_test_nflx[:250])), y_test_nflx[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 0].plot(np.arange(len(y_pred_gb_nflx[:250])), y_pred_gb_nflx[:250], color=cyberpunk_pink, label='Predicted Price (GB)', alpha=0.7) # Pink color
axs[1, 0].set_title('Gradient Boosting', fontsize=12, color='white')
axs[1, 0].set_xlabel('Date', fontsize=10, color='white')
axs[1, 0].set_ylabel('Price', fontsize=10, color='white')
axs[1, 0].legend(loc='upper left')
axs[1, 0].grid(True, linestyle='--', alpha=0.7)
axs[1, 0].set_facecolor(cyberpunk_background)
# Plot 4: LSTM Neural Network (Yellow)
axs[1, 1].plot(np.arange(len(y_test_nflx[:250])), y_test_nflx[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 1].plot(np.arange(len(y_pred_nn_nflx[:250])), y_pred_nn_nflx[:250], color=lstm_color, label='Predicted Price (NN)', alpha=0.7)
axs[1, 1].set_title('LSTM Neural Network', fontsize=12, color='white')
axs[1, 1].set_xlabel('Date', fontsize=10, color='white')
axs[1, 1].set_ylabel('Price', fontsize=10, color='white')
axs[1, 1].legend(loc='upper left')
axs[1, 1].grid(True, linestyle='--', alpha=0.7)
axs[1, 1].set_facecolor(cyberpunk_background)
# Display the final dashboard
plt.show()