query = """
SELECT *
FROM `stockprediction-434721.stock_data.amzn_prices`
WHERE CAST(Date AS DATE) >= DATE_SUB(CURRENT_DATE(), INTERVAL 5 YEAR)
"""

amzn_df = client.query(query).to_dataframe()

amzn_df.head(10)


# Check for any missing or null values
print(amzn_df.isnull().sum())

import pandas as pd
# Ensure that Date is in datetime format
amzn_df['Date'] = pd.to_datetime(amzn_df['Date'])

# Drop columns that are not necessary for modeling
# Adjust this based on your needs
amzn_df = amzn_df.drop(columns=['Adj Close'])

# Sort data by Date in ascending order
amzn_df = amzn_df.sort_values(by='Date', ascending=True)

# Preview updated dataframes
print(amzn_df.head())

# Feature Engineering for Amazon

# 1. Moving Averages
amzn_df['7_day_MA'] = amzn_df['Close'].rolling(window=7).mean()
amzn_df['30_day_MA'] = amzn_df['Close'].rolling(window=30).mean()

# 2. Volatility (Standard deviation of daily returns over 7 and 30 days)
amzn_df['7_day_volatility'] = amzn_df['Close'].pct_change().rolling(window=7).std()
amzn_df['30_day_volatility'] = amzn_df['Close'].pct_change().rolling(window=30).std()

# 3. Lag Features (Previous day's price and volume)
amzn_df['Previous_Close'] = amzn_df['Close'].shift(1)
amzn_df['Previous_Volume'] = amzn_df['Volume'].shift(1)

# 4. Daily Returns
amzn_df['Daily_Return'] = amzn_df['Close'].pct_change()

# Preview updated dataframe for Amazon
print(amzn_df.head())

# Check for missing values in each column for Amazon
print(amzn_df.isna().sum())

# Visualize where NaNs occur in Amazon data
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 6))
sns.heatmap(amzn_df.isna(), cbar=False, cmap="viridis")
plt.title('Amazon Data Missing Values')
plt.show()

# Drop rows with NaN values in the Amazon dataframe
amzn_df_cleaned = amzn_df.dropna()

# Preview the cleaned Amazon dataframe
print(amzn_df_cleaned.head())

print(amzn_df_cleaned.shape)

# Export the cleaned and feature-engineered dataframe to a CSV file
amzn_df_cleaned.to_csv('amzn_cleaned_feature_engineered.csv', index=False)
print("Dataframe exported to CSV.")

from google.colab import files

# Download the Amazon CSV file to your local machine
files.download('amzn_cleaned_feature_engineered.csv')

from sklearn.model_selection import train_test_split

# Define features and target variable
X_amzn = amzn_df_cleaned[['7_day_MA', '30_day_MA', '7_day_volatility', '30_day_volatility', 'Previous_Close', 'Previous_Volume', 'Daily_Return']]
y_amzn = amzn_df_cleaned['Close']

# Split the data
X_train_amzn, X_test_amzn, y_train_amzn, y_test_amzn = train_test_split(X_amzn, y_amzn, test_size=0.2, random_state=42)

# Preview the shapes
print(X_train_amzn.shape, X_test_amzn.shape, y_train_amzn.shape, y_test_amzn.shape)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize the model
model_amzn = LinearRegression()

# Train the model on the training data
model_amzn.fit(X_train_amzn, y_train_amzn)

# Predict on the test data
y_pred_amzn = model_amzn.predict(X_test_amzn)

# Evaluate the model
mse_amzn = mean_squared_error(y_test_amzn, y_pred_amzn)
r2_amzn = r2_score(y_test_amzn, y_pred_amzn)

print("Amazon Linear Regression Performance:")
print(f"Mean Squared Error: {mse_amzn}")
print(f"R-squared: {r2_amzn}")

import matplotlib.pyplot as plt
import numpy as np

# Define the cyberpunk theme colors
cyberpunk_blue = '#00FFFF'
cyberpunk_red = '#FF007F'
cyberpunk_background = '#0D0D0D'

# Customize the plot style
plt.style.use('dark_background')

# Plot for Amazon stock
plt.figure(figsize=(10, 6))
plt.plot(np.arange(len(y_test_amzn)), y_test_amzn, color=cyberpunk_blue, label='Actual Price', linewidth=2)
plt.plot(np.arange(len(y_pred_amzn)), y_pred_amzn, color=cyberpunk_red, linestyle='--', label='Predicted Price', linewidth=2)
plt.title('Amazon Stock Price - Actual vs Predicted', fontsize=16, color=cyberpunk_blue)
plt.xlabel('Date', fontsize=12, color='white')
plt.ylabel('Price', fontsize=12, color='white')
plt.legend(loc='upper left', fontsize=10)
plt.grid(True, color='#333333')
plt.gca().set_facecolor(cyberpunk_background)
plt.show()

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Initialize the model for Amazon
rf_amzn = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the training data
rf_amzn.fit(X_train_amzn, y_train_amzn)

# Predict on the test data
y_pred_rf_amzn = rf_amzn.predict(X_test_amzn)

# Evaluate the model
mse_rf_amzn = mean_squared_error(y_test_amzn, y_pred_rf_amzn)
r2_rf_amzn = r2_score(y_test_amzn, y_pred_rf_amzn)

print("Amazon Random Forest Performance:")
print(f"Mean Squared Error: {mse_rf_amzn}")
print(f"R-squared: {r2_rf_amzn}")

# Visualization for Random Forest - Amazon
plt.figure(figsize=(10, 6))
plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_rf_amzn[:250], 'm--', label="Predicted Price")
plt.title("Amazon Stock Price - Actual vs Predicted (Random Forest)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()

# Get feature importance from the Random Forest model
importances_amzn = rf_amzn.feature_importances_

# Create a dataframe for the features and their importance
feature_names_amzn = X_train_amzn.columns
importance_df_amzn = pd.DataFrame({
    'Feature': feature_names_amzn,
    'Importance': importances_amzn
})

# Sort the dataframe by importance
importance_df_amzn = importance_df_amzn.sort_values(by='Importance', ascending=False)

# Plot the feature importance
plt.figure(figsize=(10, 6))
plt.barh(importance_df_amzn['Feature'], importance_df_amzn['Importance'], color='cyan')
plt.xlabel('Feature Importance', color='cyan')
plt.ylabel('Features', color='cyan')
plt.title('Amazon Stock Feature Importance (Random Forest)', color='cyan')
plt.gca().set_facecolor('black')
plt.gca().spines['bottom'].set_color('cyan')
plt.gca().spines['top'].set_color('cyan')
plt.gca().spines['left'].set_color('cyan')
plt.gca().spines['right'].set_color('cyan')
plt.show()

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Initialize the Gradient Boosting model for Amazon
gb_amzn = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Train the model on the training data
gb_amzn.fit(X_train_amzn, y_train_amzn)

# Predict on the test data
y_pred_gb_amzn = gb_amzn.predict(X_test_amzn)

# Evaluate the model
mse_gb_amzn = mean_squared_error(y_test_amzn, y_pred_gb_amzn)
r2_gb_amzn = r2_score(y_test_amzn, y_pred_gb_amzn)

print("Amazon Gradient Boosting Performance:")
print(f"Mean Squared Error: {mse_gb_amzn}")
print(f"R-squared: {r2_gb_amzn}")

# Visualization for Gradient Boosting - Amazon
plt.figure(figsize=(10, 6))
plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_gb_amzn[:250], 'm--', label="Predicted Price")
plt.title("Amazon Stock Price - Actual vs Predicted (Gradient Boosting)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle='--', alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()

from sklearn.model_selection import GridSearchCV

# Define the parameter grid for Gradient Boosting
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the model
gb_amzn = GradientBoostingRegressor(random_state=42)

# Initialize GridSearchCV
grid_search_amzn = GridSearchCV(estimator=gb_amzn, param_grid=param_grid,
                                cv=5, n_jobs=-1, verbose=2)

# Fit the model to the training data
grid_search_amzn.fit(X_train_amzn, y_train_amzn)

# Get the best parameters
best_params_amzn = grid_search_amzn.best_params_
print("Best parameters for Amazon:", best_params_amzn)

# Evaluate the model with the best parameters
best_gb_amzn = grid_search_amzn.best_estimator_
y_pred_amzn = best_gb_amzn.predict(X_test_amzn)
mse_amzn = mean_squared_error(y_test_amzn, y_pred_amzn)
r2_amzn = r2_score(y_test_amzn, y_pred_amzn)

print(f"Amazon Gradient Boosting Performance (Tuned):")
print(f"Mean Squared Error: {mse_amzn}")
print(f"R-squared: {r2_amzn}")

import joblib
joblib.dump(best_gb_amzn, 'best_gb_amzn_model.pkl')

#.    import joblib

# Load the saved Amazon Gradient Boosting model
#.    best_gb_amzn_loaded = joblib.load('best_gb_amzn_model.pkl')

# Predict on new data (assuming `new_data_amzn` is available)
# new_data_amzn should be in the same format as your training data (features)
#.    future_predictions_amzn = best_gb_amzn_loaded.predict(new_data_amzn)

# Print the predictions
#.    print(future_predictions_amzn)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Feature scaling
scaler = StandardScaler()
X_train_amzn_scaled = scaler.fit_transform(X_train_amzn)
X_test_amzn_scaled = scaler.transform(X_test_amzn)

# Define the neural network model
model_amzn = Sequential([
    Dense(64, input_dim=X_train_amzn.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer
])

# Compile the model
model_amzn.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history_amzn = model_amzn.fit(X_train_amzn_scaled, y_train_amzn, validation_split=0.2, epochs=50, batch_size=32)

# Predict on the test set
y_pred_nn_amzn = model_amzn.predict(X_test_amzn_scaled)

# Evaluate the performance
mse_amzn_nn = mean_squared_error(y_test_amzn, y_pred_nn_amzn)
r2_amzn_nn = r2_score(y_test_amzn, y_pred_nn_amzn)

print(f"Amazon Neural Network Performance:")
print(f"Mean Squared Error: {mse_amzn_nn}")
print(f"R-squared: {r2_amzn_nn}")

# Amazon Neural Network Predictions Visualization
plt.figure(figsize=(10, 6))
plt.plot(y_test_amzn[:250].values, color="cyan", label="Actual Price")
plt.plot(y_pred_amzn[:250], 'm--', label="Predicted Price")
plt.title("Amazon Stock Price - Actual vs Predicted (Neural Network)", color="cyan")
plt.xlabel("Date", color="cyan")
plt.ylabel("Price", color="cyan")
plt.legend(loc="best")
plt.grid(True, linestyle="--", alpha=0.7)
plt.gca().set_facecolor("black")
plt.gca().spines["bottom"].set_color("cyan")
plt.gca().spines["top"].set_color("cyan")
plt.gca().spines["left"].set_color("cyan")
plt.gca().spines["right"].set_color("cyan")
plt.show()

# Save the Neural Network model for Amazon in the native Keras format
model_amzn.save('best_nn_amzn_model_tuned.keras')

joblib.dump(model_amzn, 'linear_reg_amzn_model.pkl')

joblib.dump(rf_amzn, 'random_forest_amzn_model.pkl')

joblib.dump(best_gb_amzn, 'gradient_boost_amzn_model.pkl')

# Load all Models for Amazon:
from tensorflow.keras.models import load_model

# Load Linear Regression model
linear_reg_amzn_model = joblib.load('linear_reg_amzn_model.pkl')

# Load Random Forest model
random_forest_amzn_model = joblib.load('random_forest_amzn_model.pkl')

# Load Gradient Boosting model
gradient_boost_amzn_model = joblib.load('gradient_boost_amzn_model.pkl')

# Load Neural Network model for Amazon
best_nn_amzn_model = load_model('best_nn_amzn_model_tuned.keras')

from google.colab import files

# Downloading Amazon models
files.download('linear_reg_amzn_model.pkl')
files.download('random_forest_amzn_model.pkl')
files.download('gradient_boost_amzn_model.pkl')
files.download('best_nn_amzn_model_tuned.keras')

# Download other models as needed

import matplotlib.pyplot as plt
import numpy as np

# Define colors
cyberpunk_blue = '#00FFFF'
cyberpunk_pink = '#FF1493'  # This is the pink color for Gradient Boosting
cyberpunk_background = '#000D0D'
random_forest_color = '#FF00FF'  # Magenta for Random Forest
lstm_color = '#FFFF00'  # Yellow for LSTM

# Create subplots: 2 rows, 2 columns
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
fig.subplots_adjust(hspace=0.6, top=0.70)  # Adjusting space between the charts and shifting top margin for title

# Title for the entire figure
fig.suptitle('Amazon Stock Price Prediction - Model Comparison', fontsize=18, color='white')

# Table with model performance metrics, replacing MSE for Neural Network with '--'
table_data = [
    ["Model", "R-squared", "Mean Squared Error"],
    ["Linear Regression", 0.9995, 0.4788],
    ["Random Forest", 0.9965, 3.0216],
    ["Gradient Boosting", 0.9987, 1.1142],
    ["LSTM Neural Network", 0.9007, "--"]
]

# Add the table without extra space
ax_table = fig.add_axes([0.1, 0.78, 0.8, 0.12])  # Shifting the table slightly lower
ax_table.axis('off')
table = ax_table.table(cellText=table_data, colWidths=[0.3]*3, loc='center', cellLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)

# Set table background to black and text to white
for key, cell in table.get_celld().items():
    cell.set_edgecolor('white')
    cell.set_text_props(color='white')
    cell.set_facecolor('black')

# Plot 1: Linear Regression
axs[0, 0].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 0].plot(np.arange(len(y_pred_amzn[:250])), y_pred_amzn[:250], 'm--', label='Predicted Price (LR)', alpha=0.7)
axs[0, 0].set_title('Linear Regression', fontsize=12, color='white')
axs[0, 0].set_xlabel('Date', fontsize=10, color='white')
axs[0, 0].set_ylabel('Price', fontsize=10, color='white')
axs[0, 0].legend(loc='upper left')
axs[0, 0].grid(True, linestyle='--', alpha=0.7)
axs[0, 0].set_facecolor(cyberpunk_background)

# Plot 2: Random Forest (Magenta)
axs[0, 1].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[0, 1].plot(np.arange(len(y_pred_rf_amzn[:250])), y_pred_rf_amzn[:250], color=random_forest_color, label='Predicted Price (RF)', alpha=0.7)
axs[0, 1].set_title('Random Forest', fontsize=12, color='white')
axs[0, 1].set_xlabel('Date', fontsize=10, color='white')
axs[0, 1].set_ylabel('Price', fontsize=10, color='white')
axs[0, 1].legend(loc='upper left')
axs[0, 1].grid(True, linestyle='--', alpha=0.7)
axs[0, 1].set_facecolor(cyberpunk_background)

# Plot 3: Gradient Boosting (Pink)
axs[1, 0].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 0].plot(np.arange(len(y_pred_gb_amzn[:250])), y_pred_gb_amzn[:250], color=cyberpunk_pink, label='Predicted Price (GB)', alpha=0.7)  # Pink color
axs[1, 0].set_title('Gradient Boosting', fontsize=12, color='white')
axs[1, 0].set_xlabel('Date', fontsize=10, color='white')
axs[1, 0].set_ylabel('Price', fontsize=10, color='white')
axs[1, 0].legend(loc='upper left')
axs[1, 0].grid(True, linestyle='--', alpha=0.7)
axs[1, 0].set_facecolor(cyberpunk_background)

# Plot 4: LSTM Neural Network (Yellow)
axs[1, 1].plot(np.arange(len(y_test_amzn[:250])), y_test_amzn[:250], color=cyberpunk_blue, label='Actual Price')
axs[1, 1].plot(np.arange(len(y_pred_nn_amzn[:250])), y_pred_nn_amzn[:250], color=lstm_color, label='Predicted Price (NN)', alpha=0.7)
axs[1, 1].set_title('LSTM Neural Network', fontsize=12, color='white')
axs[1, 1].set_xlabel('Date', fontsize=10, color='white')
axs[1, 1].set_ylabel('Price', fontsize=10, color='white')
axs[1, 1].legend(loc='upper left')
axs[1, 1].grid(True, linestyle='--', alpha=0.7)
axs[1, 1].set_facecolor(cyberpunk_background)

# Display the final dashboard
plt.show()