query = """ SELECT * FROM `stockprediction-434721.stock_data.amzn_prices` WHERE CAST(Date AS DATE) >= DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR) """ amzn_df = client.query(query).to_dataframe() amzn_df.head(10) # Check for any missing or null values print(amzn_df.isnull().sum()) import pandas as pd # Ensure that Date is in datetime format amzn_df['Date'] = pd.to_datetime(amzn_df['Date']) # Drop columns that are not necessary for modeling # Adjust this based on your needs amzn_df = amzn_df.drop(columns=['Adj Close']) # Sort data by Date in ascending order amzn_df = amzn_df.sort_values(by='Date', ascending=True) # Preview updated dataframes print(amzn_df.head()) # Feature Engineering for Amazon # 1. Moving Averages amzn_df['7_day_MA'] = amzn_df['Close'].rolling(window=7).mean() amzn_df['30_day_MA'] = amzn_df['Close'].rolling(window=30).mean() # 2. Volatility (Standard deviation of daily returns over 7 and 30 days) amzn_df['7_day_volatility'] = amzn_df['Close'].pct_change().rolling(window=7).std() amzn_df['30_day_volatility'] = amzn_df['Close'].pct_change().rolling(window=30).std() # 3. Lag Features (Previous day's price and volume) amzn_df['Previous_Close'] = amzn_df['Close'].shift(1) amzn_df['Previous_Volume'] = amzn_df['Volume'].shift(1) # 4. Daily Returns amzn_df['Daily_Return'] = amzn_df['Close'].pct_change() # Preview updated dataframe for Amazon print(amzn_df.head()) # Check for missing values in each column for Amazon print(amzn_df.isna().sum()) # Visualize where NaNs occur in Amazon data import matplotlib.pyplot as plt import seaborn as sns plt.figure(figsize=(10, 6)) sns.heatmap(amzn_df.isna(), cbar=False, cmap="viridis") plt.title('Amazon Data Missing Values') plt.show() # Drop rows with NaN values in the Amazon dataframe amzn_df_cleaned = amzn_df.dropna() # Preview the cleaned Amazon dataframe print(amzn_df_cleaned.head()) print(amzn_df_cleaned.shape) # Export the cleaned and feature-engineered dataframe to a CSV file amzn_df_cleaned.to_csv('amzn_cleaned_feature_engineered.csv', index=False) print("Dataframe exported to CSV.") from google.colab import files # Download the Amazon CSV file to your local machine files.download('amzn_cleaned_feature_engineered.csv') from sklearn.model_selection import train_test_split # Define features and target variable X_amzn = amzn_df_cleaned[['7_day_MA', '30_day_MA', '7_day_volatility', '30_day_volatility', 'Previous_Close', 'Previous_Volume', 'Daily_Return']] y_amzn = amzn_df_cleaned['Close'] # Split the data X_train_amzn, X_test_amzn, y_train_amzn, y_test_amzn = train_test_split(X_amzn, y_amzn, test_size=0.2, random_state=42) # Preview the shapes print(X_train_amzn.shape, X_test_amzn.shape, y_train_amzn.shape, y_test_amzn.shape) from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score # Initialize the model model_amzn = LinearRegression() # Train the model on the training data model_amzn.fit(X_train_amzn, y_train_amzn) # Predict on the test data y_pred_amzn = model_amzn.predict(X_test_amzn) # Evaluate the model mse_amzn = mean_squared_error(y_test_amzn, y_pred_amzn) r2_amzn = r2_score(y_test_amzn, y_pred_amzn) print("Amazon Linear Regression Performance:") print(f"Mean Squared Error: {mse_amzn}") print(f"R-squared: {r2_amzn}") import matplotlib.pyplot as plt import numpy as np # Define the cyberpunk theme colors cyberpunk_blue = '#00FFFF' cyberpunk_red = '#FF007F' cyberpunk_background = '#0D0D0D' # Customize the plot style plt.style.use('dark_background') # Plot for Amazon stock plt.figure(figsize=(10, 6)) plt.plot(np.arange(len(y_test_amzn)), y_test_amzn, color=cyberpunk_blue, label='Actual Price', linewidth=2) plt.plot(np.arange(len(y_pred_amzn)), y_pred_amzn, color=cyberpunk_red, linestyle='--', label='Predicted Price', linewidth=2) plt.title('Amazon Stock Price - Actual vs Predicted', fontsize=16, color=cyberpunk_blue) plt.xlabel('Date', fontsize=12, color='white') plt.ylabel('Price', fontsize=12, color='white') plt.legend(loc='upper left', fontsize=10) plt.grid(True, color='#333333') plt.gca().set_facecolor(cyberpunk_background) plt.show() from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error, r2_score # Initialize the model for Amazon rf_amzn = RandomForestRegressor(n_estimators=100, random_state=42) # Train the model on the training data rf_amzn.fit(X_train_amzn, y_train_amzn) # Predict on the test data y_pred_rf_amzn = rf_amzn.predict(X_test_amzn) # Evaluate the model mse_rf_amzn = mean_squared_error(y_test_amzn, y_pred_rf_amzn) r2_rf_amzn = r2_score(y_test_amzn, y_pred_rf_amzn) print("Amazon Random Forest Performance:") print(f"Mean Squared Error: {mse_rf_amzn}") print(f"R-squared: {r2_rf_amzn}") # Visualization for Random Forest - Amazon plt.figure(figsize=(10, 6)) plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price") plt.plot(y_pred_rf_amzn[:250], 'm--', label="Predicted Price") plt.title("Amazon Stock Price - Actual vs Predicted (Random Forest)", color="cyan") plt.xlabel("Date", color="cyan") plt.ylabel("Price", color="cyan") plt.legend(loc="best") plt.grid(True, linestyle='--', alpha=0.7) plt.gca().set_facecolor("black") plt.gca().spines["bottom"].set_color("cyan") plt.gca().spines["top"].set_color("cyan") plt.gca().spines["left"].set_color("cyan") plt.gca().spines["right"].set_color("cyan") plt.show() # Get feature importance from the Random Forest model importances_amzn = rf_amzn.feature_importances_ # Create a dataframe for the features and their importance feature_names_amzn = X_train_amzn.columns importance_df_amzn = pd.DataFrame({ 'Feature': feature_names_amzn, 'Importance': importances_amzn }) # Sort the dataframe by importance importance_df_amzn = importance_df_amzn.sort_values(by='Importance', ascending=False) # Plot the feature importance plt.figure(figsize=(10, 6)) plt.barh(importance_df_amzn['Feature'], importance_df_amzn['Importance'], color='cyan') plt.xlabel('Feature Importance', color='cyan') plt.ylabel('Features', color='cyan') plt.title('Amazon Stock Feature Importance (Random Forest)', color='cyan') plt.gca().set_facecolor('black') plt.gca().spines['bottom'].set_color('cyan') plt.gca().spines['top'].set_color('cyan') plt.gca().spines['left'].set_color('cyan') plt.gca().spines['right'].set_color('cyan') plt.show() from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import mean_squared_error, r2_score # Initialize the Gradient Boosting model for Amazon gb_amzn = GradientBoostingRegressor(n_estimators=100, random_state=42) # Train the model on the training data gb_amzn.fit(X_train_amzn, y_train_amzn) # Predict on the test data y_pred_gb_amzn = gb_amzn.predict(X_test_amzn) # Evaluate the model mse_gb_amzn = mean_squared_error(y_test_amzn, y_pred_gb_amzn) r2_gb_amzn = r2_score(y_test_amzn, y_pred_gb_amzn) print("Amazon Gradient Boosting Performance:") print(f"Mean Squared Error: {mse_gb_amzn}") print(f"R-squared: {r2_gb_amzn}") # Visualization for Gradient Boosting - Amazon plt.figure(figsize=(10, 6)) plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price") plt.plot(y_pred_gb_amzn[:250], 'm--', label="Predicted Price") plt.title("Amazon Stock Price - Actual vs Predicted (Gradient Boosting)", color="cyan") plt.xlabel("Date", color="cyan") plt.ylabel("Price", color="cyan") plt.legend(loc="best") plt.grid(True, linestyle='--', alpha=0.7) plt.gca().set_facecolor("black") plt.gca().spines["bottom"].set_color("cyan") plt.gca().spines["top"].set_color("cyan") plt.gca().spines["left"].set_color("cyan") plt.gca().spines["right"].set_color("cyan") plt.show() from sklearn.model_selection import GridSearchCV # Define the parameter grid for Gradient Boosting param_grid = { 'n_estimators': [100, 200, 300], 'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5, 7], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4] } # Initialize the model gb_amzn = GradientBoostingRegressor(random_state=42) # Initialize GridSearchCV grid_search_amzn = GridSearchCV(estimator=gb_amzn, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2) # Fit the model to the training data grid_search_amzn.fit(X_train_amzn, y_train_amzn) # Get the best parameters best_params_amzn = grid_search_amzn.best_params_ print("Best parameters for Amazon:", best_params_amzn) # Evaluate the model with the best parameters best_gb_amzn = grid_search_amzn.best_estimator_ y_pred_amzn = best_gb_amzn.predict(X_test_amzn) mse_amzn = mean_squared_error(y_test_amzn, y_pred_amzn) r2_amzn = r2_score(y_test_amzn, y_pred_amzn) print(f"Amazon Gradient Boosting Performance (Tuned):") print(f"Mean Squared Error: {mse_amzn}") print(f"R-squared: {r2_amzn}") import joblib joblib.dump(best_gb_amzn, 'best_gb_amzn_model.pkl') #. import joblib # Load the saved Amazon Gradient Boosting model #. best_gb_amzn_loaded = joblib.load('best_gb_amzn_model.pkl') # Predict on new data (assuming `new_data_amzn` is available) # new_data_amzn should be in the same format as your training data (features) #. future_predictions_amzn = best_gb_amzn_loaded.predict(new_data_amzn) # Print the predictions #. print(future_predictions_amzn) from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error, r2_score # Feature scaling scaler = StandardScaler() X_train_amzn_scaled = scaler.fit_transform(X_train_amzn) X_test_amzn_scaled = scaler.transform(X_test_amzn) # Define the neural network model model_amzn = Sequential([ Dense(64, input_dim=X_train_amzn.shape[1], activation='relu'), Dense(32, activation='relu'), Dense(1) # Output layer ]) # Compile the model model_amzn.compile(optimizer='adam', loss='mean_squared_error') # Train the model history_amzn = model_amzn.fit(X_train_amzn_scaled, y_train_amzn, validation_split=0.2, epochs=50, batch_size=32) # Predict on the test set y_pred_nn_amzn = model_amzn.predict(X_test_amzn_scaled) # Evaluate the performance mse_amzn_nn = mean_squared_error(y_test_amzn, y_pred_nn_amzn) r2_amzn_nn = r2_score(y_test_amzn, y_pred_nn_amzn) print(f"Amazon Neural Network Performance:") print(f"Mean Squared Error: {mse_amzn_nn}") print(f"R-squared: {r2_amzn_nn}") # Amazon Neural Network Predictions Visualization plt.figure(figsize=(10, 6)) plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price") plt.plot(y_pred_amzn[:250], 'm--', label="Predicted Price") plt.title("Amazon Stock Price - Actual vs Predicted (Neural Network)", color="cyan") plt.xlabel("Date", color="cyan") plt.ylabel("Price", color="cyan") plt.legend(loc="best") plt.grid(True, linestyle="--", alpha=0.7) plt.gca().set_facecolor("black") plt.gca().spines["bottom"].set_color("cyan") plt.gca().spines["top"].set_color("cyan") plt.gca().spines["left"].set_color("cyan") plt.gca().spines["right"].set_color("cyan") plt.show() # Save the Neural Network model for Amazon in the native Keras format model_amzn.save('best_nn_amzn_model_tuned.keras') joblib.dump(model_amzn, 'linear_reg_amzn_model.pkl') joblib.dump(rf_amzn, 'random_forest_amzn_model.pkl') joblib.dump(best_gb_amzn, 'gradient_boost_amzn_model.pkl') # Load all Models for Amazon: from tensorflow.keras.models import load_model # Load Linear Regression model linear_reg_amzn_model = joblib.load('linear_reg_amzn_model.pkl') # Load Random Forest model random_forest_amzn_model = joblib.load('random_forest_amzn_model.pkl') # Load Gradient Boosting model gradient_boost_amzn_model = joblib.load('gradient_boost_amzn_model.pkl') # Load Neural Network model for Amazon best_nn_amzn_model = load_model('best_nn_amzn_model_tuned.keras') from google.colab import files # Downloading Amazon models files.download('linear_reg_amzn_model.pkl') files.download('random_forest_amzn_model.pkl') files.download('gradient_boost_amzn_model.pkl') files.download('best_nn_amzn_model_tuned.keras') # Download other models as needed import matplotlib.pyplot as plt import numpy as np # Define colors cyberpunk_blue = '#00FFFF' cyberpunk_pink = '#FF1493' # This is the pink color for Gradient Boosting cyberpunk_background = '#000D0D' random_forest_color = '#FF00FF' # Magenta for Random Forest lstm_color = '#FFFF00' # Yellow for LSTM # Create subplots: 2 rows, 2 columns fig, axs = plt.subplots(2, 2, figsize=(15, 10)) fig.subplots_adjust(hspace=0.6, top=0.70) # Adjusting space between the charts and shifting top margin for title # Title for the entire figure fig.suptitle('Amazon Stock Price Prediction - Model Comparison', fontsize=18, color='white') # Table with model performance metrics, replacing MSE for Neural Network with '--' table_data = [ ["Model", "R-squared", "Mean Squared Error"], ["Linear Regression", 0.9995, 0.4788], ["Random Forest", 0.9965, 3.0216], ["Gradient Boosting", 0.9987, 1.1142], ["LSTM Neural Network", 0.9007, "--"] ] # Add the table without extra space ax_table = fig.add_axes([0.1, 0.78, 0.8, 0.12]) # Shifting the table slightly lower ax_table.axis('off') table = ax_table.table(cellText=table_data, colWidths=[0.3]*3, loc='center', cellLoc='center') table.auto_set_font_size(False) table.set_fontsize(12) table.scale(1, 1.5) # Set table background to black and text to white for key, cell in table.get_celld().items(): cell.set_edgecolor('white') cell.set_text_props(color='white') cell.set_facecolor('black') # Plot 1: Linear Regression axs[0, 0].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price') axs[0, 0].plot(np.arange(len(y_pred_amzn[:250])), y_pred_amzn[:250], 'm--', label='Predicted Price (LR)', alpha=0.7) axs[0, 0].set_title('Linear Regression', fontsize=12, color='white') axs[0, 0].set_xlabel('Date', fontsize=10, color='white') axs[0, 0].set_ylabel('Price', fontsize=10, color='white') axs[0, 0].legend(loc='upper left') axs[0, 0].grid(True, linestyle='--', alpha=0.7) axs[0, 0].set_facecolor(cyberpunk_background) # Plot 2: Random Forest (Magenta) axs[0, 1].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price') axs[0, 1].plot(np.arange(len(y_pred_rf_amzn[:250])), y_pred_rf_amzn[:250], color=random_forest_color, label='Predicted Price (RF)', alpha=0.7) axs[0, 1].set_title('Random Forest', fontsize=12, color='white') axs[0, 1].set_xlabel('Date', fontsize=10, color='white') axs[0, 1].set_ylabel('Price', fontsize=10, color='white') axs[0, 1].legend(loc='upper left') axs[0, 1].grid(True, linestyle='--', alpha=0.7) axs[0, 1].set_facecolor(cyberpunk_background) # Plot 3: Gradient Boosting (Pink) axs[1, 0].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price') axs[1, 0].plot(np.arange(len(y_pred_gb_amzn[:250])), y_pred_gb_amzn[:250], color=cyberpunk_pink, label='Predicted Price (GB)', alpha=0.7) # Pink color axs[1, 0].set_title('Gradient Boosting', fontsize=12, color='white') axs[1, 0].set_xlabel('Date', fontsize=10, color='white') axs[1, 0].set_ylabel('Price', fontsize=10, color='white') axs[1, 0].legend(loc='upper left') axs[1, 0].grid(True, linestyle='--', alpha=0.7) axs[1, 0].set_facecolor(cyberpunk_background) # Plot 4: LSTM Neural Network (Yellow) axs[1, 1].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price') axs[1, 1].plot(np.arange(len(y_pred_nn_amzn[:250])), y_pred_nn_amzn[:250], color=lstm_color, label='Predicted Price (NN)', alpha=0.7) axs[1, 1].set_title('LSTM Neural Network', fontsize=12, color='white') axs[1, 1].set_xlabel('Date', fontsize=10, color='white') axs[1, 1].set_ylabel('Price', fontsize=10, color='white') axs[1, 1].legend(loc='upper left') axs[1, 1].grid(True, linestyle='--', alpha=0.7) axs[1, 1].set_facecolor(cyberpunk_background) # Display the final dashboard plt.show()