!pip install tensorflow

!pip install -q yfinance

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Fetch historical stock data for S&P 500 from 2010 to 2023
ticker = '^GSPC'
start_date = '2010-01-01'
end_date = '2023-09-02'
data = yf.download(ticker, start=start_date, end=end_date)
data.head()

# Extract 'Close' prices and convert to numpy array
close_data = data['Close'].values.reshape(-1, 1)

# Normalize the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_data)

# Create training and test datasets
train_size = int(len(scaled_data) * 0.8)
test_size = len(scaled_data) - train_size
train_data, test_data = scaled_data[0:train_size, :], scaled_data[train_size:len(scaled_data), :]

# Reshape the data for LSTM input
X_train, y_train, X_test, y_test = [], [], [], []
for i in range(60, len(train_data)):
    X_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
for i in range(60, len(test_data)):
    X_test.append(test_data[i-60:i, 0])
    y_test.append(test_data[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

X_test, y_test = np.array(X_test), np.array(y_test)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

X_train.shape, X_test.shape

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50, activation='relu', return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Summary of the model architecture
model.summary()

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=25,
    batch_size=64,
    validation_data=(X_test, y_test),
    verbose=1
)

# Make predictions using the test data
predictions = model.predict(X_test)
# Transform predictions back to original scale
predictions = scaler.inverse_transform(np.reshape(predictions, (-1, 1)))
# Create a DataFrame to store the actual and predicted values
test_data_range = data.iloc[train_size + 60:]['Close'].index
comparison_df = pd.DataFrame({'Actual': data.iloc[train_size + 60:]['Close'].values, 'Predicted': np.squeeze(predictions)}, index=test_data_range)
comparison_df.head()

from sklearn.metrics import mean_squared_error
from math import sqrt

# Calculate RMSE
rmse = sqrt(mean_squared_error(comparison_df['Actual'], comparison_df['Predicted']))
rmse

# Plot the actual and predicted values
plt.figure(figsize=(16, 8))
plt.title('S&P 500 Price Prediction')
plt.xlabel('Date')
plt.ylabel('Close Price (USD)')
plt.plot(comparison_df['Actual'], label='Actual', color='blue')
plt.plot(comparison_df['Predicted'], label='Predicted', color='red')
plt.legend(loc='upper left')
plt.show()

from datetime import timedelta

# Initialize variables for future prediction
future_days = 120  # Number of days to predict into the future
future_predictions = []
last_60_days_scaled = scaled_data[-60:]  # Most recent 60 days of data

# Loop to predict future prices
for i in range(future_days):
    last_60_days_reshaped = np.reshape(last_60_days_scaled, (1, 60, 1))
    next_day_prediction_scaled = model.predict(last_60_days_reshaped)
    next_day_prediction = scaler.inverse_transform(next_day_prediction_scaled)[0][0]
    future_predictions.append(next_day_prediction)
    last_60_days_scaled = np.append(last_60_days_scaled[1:], next_day_prediction_scaled, axis=0)
    
# Create a DataFrame to store the future predictions
future_dates = [data.index[-1] + timedelta(days=i+1) for i in range(future_days)]
future_predictions_df = pd.DataFrame(future_predictions, columns=['Predicted'], index=future_dates)
future_predictions_df.head()

# Plot the historical and future predicted values
plt.figure(figsize=(16, 8))
plt.title('S&P 500 Future Price Prediction')
plt.xlabel('Date')
plt.ylabel('Close Price (USD)')
plt.plot(data['Close'], label='Historical', color='blue')
plt.plot(future_predictions_df['Predicted'], label='Future Predicted', color='red')
plt.legend(loc='upper left')
plt.show()