!pip install tensorflow !pip install -q yfinance import yfinance as yf import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, LSTM # Fetch historical stock data for S&P 500 from 2010 to 2023 ticker = '^GSPC' start_date = '2010-01-01' end_date = '2023-09-02' data = yf.download(ticker, start=start_date, end=end_date) data.head() # Extract 'Close' prices and convert to numpy array close_data = data['Close'].values.reshape(-1, 1) # Normalize the data using MinMaxScaler scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(close_data) # Create training and test datasets train_size = int(len(scaled_data) * 0.8) test_size = len(scaled_data) - train_size train_data, test_data = scaled_data[0:train_size, :], scaled_data[train_size:len(scaled_data), :] # Reshape the data for LSTM input X_train, y_train, X_test, y_test = [], [], [], [] for i in range(60, len(train_data)): X_train.append(train_data[i-60:i, 0]) y_train.append(train_data[i, 0]) for i in range(60, len(test_data)): X_test.append(test_data[i-60:i, 0]) y_test.append(test_data[i, 0]) X_train, y_train = np.array(X_train), np.array(y_train) X_test, y_test = np.array(X_test), np.array(y_test) X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) X_train.shape, X_test.shape # Build the LSTM model model = Sequential() model.add(LSTM(units=50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1))) model.add(LSTM(units=50, activation='relu', return_sequences=False)) model.add(Dense(units=25)) model.add(Dense(units=1)) # Compile the model model.compile(optimizer='adam', loss='mean_squared_error') # Summary of the model architecture model.summary() # Train the model history = model.fit( X_train, y_train, epochs=25, batch_size=64, validation_data=(X_test, y_test), verbose=1 ) # Make predictions using the test data predictions = model.predict(X_test) # Transform predictions back to original scale predictions = scaler.inverse_transform(np.reshape(predictions, (-1, 1))) # Create a DataFrame to store the actual and predicted values test_data_range = data.iloc[train_size + 60:]['Close'].index comparison_df = pd.DataFrame({'Actual': data.iloc[train_size + 60:]['Close'].values, 'Predicted': np.squeeze(predictions)}, index=test_data_range) comparison_df.head() from sklearn.metrics import mean_squared_error from math import sqrt # Calculate RMSE rmse = sqrt(mean_squared_error(comparison_df['Actual'], comparison_df['Predicted'])) rmse # Plot the actual and predicted values plt.figure(figsize=(16, 8)) plt.title('S&P 500 Price Prediction') plt.xlabel('Date') plt.ylabel('Close Price (USD)') plt.plot(comparison_df['Actual'], label='Actual', color='blue') plt.plot(comparison_df['Predicted'], label='Predicted', color='red') plt.legend(loc='upper left') plt.show() from datetime import timedelta # Initialize variables for future prediction future_days = 120 # Number of days to predict into the future future_predictions = [] last_60_days_scaled = scaled_data[-60:] # Most recent 60 days of data # Loop to predict future prices for i in range(future_days): last_60_days_reshaped = np.reshape(last_60_days_scaled, (1, 60, 1)) next_day_prediction_scaled = model.predict(last_60_days_reshaped) next_day_prediction = scaler.inverse_transform(next_day_prediction_scaled)[0][0] future_predictions.append(next_day_prediction) last_60_days_scaled = np.append(last_60_days_scaled[1:], next_day_prediction_scaled, axis=0) # Create a DataFrame to store the future predictions future_dates = [data.index[-1] + timedelta(days=i+1) for i in range(future_days)] future_predictions_df = pd.DataFrame(future_predictions, columns=['Predicted'], index=future_dates) future_predictions_df.head() # Plot the historical and future predicted values plt.figure(figsize=(16, 8)) plt.title('S&P 500 Future Price Prediction') plt.xlabel('Date') plt.ylabel('Close Price (USD)') plt.plot(data['Close'], label='Historical', color='blue') plt.plot(future_predictions_df['Predicted'], label='Future Predicted', color='red') plt.legend(loc='upper left') plt.show()