import yfinance as yf
sp500 = yf.Ticker("^GSPC")
sp500 = sp500.history(period="max")
sp500
Open | High | Low | Close | Volume | Dividends | Stock Splits | |
---|---|---|---|---|---|---|---|
Date | |||||||
1927-12-30 00:00:00-05:00 | 17.660000 | 17.660000 | 17.660000 | 17.660000 | 0 | 0.0 | 0.0 |
1928-01-03 00:00:00-05:00 | 17.760000 | 17.760000 | 17.760000 | 17.760000 | 0 | 0.0 | 0.0 |
1928-01-04 00:00:00-05:00 | 17.719999 | 17.719999 | 17.719999 | 17.719999 | 0 | 0.0 | 0.0 |
1928-01-05 00:00:00-05:00 | 17.549999 | 17.549999 | 17.549999 | 17.549999 | 0 | 0.0 | 0.0 |
1928-01-06 00:00:00-05:00 | 17.660000 | 17.660000 | 17.660000 | 17.660000 | 0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... |
2024-09-09 00:00:00-04:00 | 5442.069824 | 5484.200195 | 5434.490234 | 5471.049805 | 3825940000 | 0.0 | 0.0 |
2024-09-10 00:00:00-04:00 | 5490.509766 | 5497.910156 | 5441.720215 | 5495.520020 | 3848180000 | 0.0 | 0.0 |
2024-09-11 00:00:00-04:00 | 5496.419922 | 5560.410156 | 5406.959961 | 5554.129883 | 3839450000 | 0.0 | 0.0 |
2024-09-12 00:00:00-04:00 | 5557.479980 | 5600.709961 | 5535.500000 | 5595.759766 | 3655070000 | 0.0 | 0.0 |
2024-09-13 00:00:00-04:00 | 5603.339844 | 5636.270020 | 5601.649902 | 5626.020020 | 3500790000 | 0.0 | 0.0 |
24292 rows × 7 columns
sp500.index
DatetimeIndex(['1927-12-30 00:00:00-05:00', '1928-01-03 00:00:00-05:00', '1928-01-04 00:00:00-05:00', '1928-01-05 00:00:00-05:00', '1928-01-06 00:00:00-05:00', '1928-01-09 00:00:00-05:00', '1928-01-10 00:00:00-05:00', '1928-01-11 00:00:00-05:00', '1928-01-12 00:00:00-05:00', '1928-01-13 00:00:00-05:00', ... '2024-08-30 00:00:00-04:00', '2024-09-03 00:00:00-04:00', '2024-09-04 00:00:00-04:00', '2024-09-05 00:00:00-04:00', '2024-09-06 00:00:00-04:00', '2024-09-09 00:00:00-04:00', '2024-09-10 00:00:00-04:00', '2024-09-11 00:00:00-04:00', '2024-09-12 00:00:00-04:00', '2024-09-13 00:00:00-04:00'], dtype='datetime64[ns, America/New_York]', name='Date', length=24292, freq=None)
CLEANING AND VISUALIZING OUR STOCK MARKET DATA:
sp500.plot.line(y="Close", use_index=True)
<Axes: xlabel='Date'>
del sp500 ["Dividends"]
del sp500 ["Stock Splits"]
PREDICTING DIRECTIONALITY
creating tommorow column and shifting close minus 1 day
sp500["Tomorrow"] = sp500["Close"].shift(-1)
sp500
Open | High | Low | Close | Volume | Tomorrow | |
---|---|---|---|---|---|---|
Date | ||||||
1927-12-30 00:00:00-05:00 | 17.660000 | 17.660000 | 17.660000 | 17.660000 | 0 | 17.760000 |
1928-01-03 00:00:00-05:00 | 17.760000 | 17.760000 | 17.760000 | 17.760000 | 0 | 17.719999 |
1928-01-04 00:00:00-05:00 | 17.719999 | 17.719999 | 17.719999 | 17.719999 | 0 | 17.549999 |
1928-01-05 00:00:00-05:00 | 17.549999 | 17.549999 | 17.549999 | 17.549999 | 0 | 17.660000 |
1928-01-06 00:00:00-05:00 | 17.660000 | 17.660000 | 17.660000 | 17.660000 | 0 | 17.500000 |
... | ... | ... | ... | ... | ... | ... |
2024-09-09 00:00:00-04:00 | 5442.069824 | 5484.200195 | 5434.490234 | 5471.049805 | 3825940000 | 5495.520020 |
2024-09-10 00:00:00-04:00 | 5490.509766 | 5497.910156 | 5441.720215 | 5495.520020 | 3848180000 | 5554.129883 |
2024-09-11 00:00:00-04:00 | 5496.419922 | 5560.410156 | 5406.959961 | 5554.129883 | 3839450000 | 5595.759766 |
2024-09-12 00:00:00-04:00 | 5557.479980 | 5600.709961 | 5535.500000 | 5595.759766 | 3655070000 | 5626.020020 |
2024-09-13 00:00:00-04:00 | 5603.339844 | 5636.270020 | 5601.649902 | 5626.020020 | 3500790000 | NaN |
24292 rows × 6 columns
sp500["Target"] = (sp500["Tomorrow"] > sp500["Close"]).astype(int)
sp500
Open | High | Low | Close | Volume | Tomorrow | Target | |
---|---|---|---|---|---|---|---|
Date | |||||||
1927-12-30 00:00:00-05:00 | 17.660000 | 17.660000 | 17.660000 | 17.660000 | 0 | 17.760000 | 1 |
1928-01-03 00:00:00-05:00 | 17.760000 | 17.760000 | 17.760000 | 17.760000 | 0 | 17.719999 | 0 |
1928-01-04 00:00:00-05:00 | 17.719999 | 17.719999 | 17.719999 | 17.719999 | 0 | 17.549999 | 0 |
1928-01-05 00:00:00-05:00 | 17.549999 | 17.549999 | 17.549999 | 17.549999 | 0 | 17.660000 | 1 |
1928-01-06 00:00:00-05:00 | 17.660000 | 17.660000 | 17.660000 | 17.660000 | 0 | 17.500000 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
2024-09-09 00:00:00-04:00 | 5442.069824 | 5484.200195 | 5434.490234 | 5471.049805 | 3825940000 | 5495.520020 | 1 |
2024-09-10 00:00:00-04:00 | 5490.509766 | 5497.910156 | 5441.720215 | 5495.520020 | 3848180000 | 5554.129883 | 1 |
2024-09-11 00:00:00-04:00 | 5496.419922 | 5560.410156 | 5406.959961 | 5554.129883 | 3839450000 | 5595.759766 | 1 |
2024-09-12 00:00:00-04:00 | 5557.479980 | 5600.709961 | 5535.500000 | 5595.759766 | 3655070000 | 5626.020020 | 1 |
2024-09-13 00:00:00-04:00 | 5603.339844 | 5636.270020 | 5601.649902 | 5626.020020 | 3500790000 | NaN | 0 |
24292 rows × 7 columns
sp500 = sp500.loc["1990-01-01":].copy()
^^working from 1990+
sp500
Open | High | Low | Close | Volume | Tomorrow | Target | |
---|---|---|---|---|---|---|---|
Date | |||||||
1990-01-02 00:00:00-05:00 | 353.399994 | 359.690002 | 351.980011 | 359.690002 | 162070000 | 358.760010 | 0 |
1990-01-03 00:00:00-05:00 | 359.690002 | 360.589996 | 357.890015 | 358.760010 | 192330000 | 355.670013 | 0 |
1990-01-04 00:00:00-05:00 | 358.760010 | 358.760010 | 352.890015 | 355.670013 | 177000000 | 352.200012 | 0 |
1990-01-05 00:00:00-05:00 | 355.670013 | 355.670013 | 351.350006 | 352.200012 | 158530000 | 353.790009 | 1 |
1990-01-08 00:00:00-05:00 | 352.200012 | 354.239990 | 350.540009 | 353.790009 | 140110000 | 349.619995 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
2024-09-09 00:00:00-04:00 | 5442.069824 | 5484.200195 | 5434.490234 | 5471.049805 | 3825940000 | 5495.520020 | 1 |
2024-09-10 00:00:00-04:00 | 5490.509766 | 5497.910156 | 5441.720215 | 5495.520020 | 3848180000 | 5554.129883 | 1 |
2024-09-11 00:00:00-04:00 | 5496.419922 | 5560.410156 | 5406.959961 | 5554.129883 | 3839450000 | 5595.759766 | 1 |
2024-09-12 00:00:00-04:00 | 5557.479980 | 5600.709961 | 5535.500000 | 5595.759766 | 3655070000 | 5626.020020 | 1 |
2024-09-13 00:00:00-04:00 | 5603.339844 | 5636.270020 | 5601.649902 | 5626.020020 | 3500790000 | NaN | 0 |
8742 rows × 7 columns
TRANING AN INITIAL MACHINE LEARNING MODEL
using random forest to train decision trees (this is resistant to overfitting)
***need to experiment with higher estimator values. min sample split values help avoid overfitting - higher = more accurate but likely to overfit. random state means if we run same model twice, the random numbers generated will be in a predicatable sequence (with setting 1)...
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
# Split the data without shuffling to maintain temporal order
train, test = train_test_split(sp500, test_size=0.1, shuffle=False)
# Define your predictors and train the Random Forest
predictors = ['Close', 'Volume', 'Open', 'High', 'Low']
model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=1)
model.fit(train[predictors], train["Target"])
RandomForestClassifier(min_samples_split=100, random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(min_samples_split=100, random_state=1)
from sklearn.metrics import precision_score
preds = model.predict(test[predictors])
import pandas as pd
preds = pd.Series(preds, index=test.index)
preds
0 | |
---|---|
Date | |
2021-03-24 00:00:00-04:00 | 1 |
2021-03-25 00:00:00-04:00 | 0 |
2021-03-26 00:00:00-04:00 | 0 |
2021-03-29 00:00:00-04:00 | 0 |
2021-03-30 00:00:00-04:00 | 0 |
... | ... |
2024-09-09 00:00:00-04:00 | 0 |
2024-09-10 00:00:00-04:00 | 0 |
2024-09-11 00:00:00-04:00 | 0 |
2024-09-12 00:00:00-04:00 | 0 |
2024-09-13 00:00:00-04:00 | 0 |
875 rows × 1 columns
precision_score(test["Target"], preds)
0.4714285714285714
combined = pd.concat([test["Target"], preds], axis=1)
predictions vs actual
combined.plot()
<Axes: xlabel='Date'>
BUILDING A BACKTESTING SYSTEM
def predict(train, test, predictors, model):
model.fit(train[predictors], train["Target"])
preds = model.predict(test[predictors])
preds = pd.Series(preds, index=test.index, name="Predictions")
combined = pd.concat([test["Target"], preds], axis=1)
return combined
def backtest(data, model, predictors, start=2500, step=250):
all_predictions = []
for i in range(start, data.shape[0], step):
train = data.iloc[0:i].copy()
test = data.iloc[i:(i+step)].copy()
predictions = predict(train, test, predictors, model)
all_predictions.append(predictions)
return pd.concat(all_predictions)
predictions = backtest(sp500, model, predictors)
predictions["Predictions"].value_counts()
count | |
---|---|
Predictions | |
0 | 3646 |
1 | 2596 |
precision_score(predictions["Target"], predictions["Predictions"])
0.5288906009244992
predictions["Target"].value_counts() / predictions.shape[0]
count | |
---|---|
Target | |
1 | 0.535566 |
0 | 0.464434 |
^^ S&P went up ~53% of days - the algorithm performed a litle worse than the natural progression
ADDING ADDITIONAL PREDICTORS TO THE MODEL
horizons = [2,5,60,250,1000]
new_predictors = []
for horizon in horizons:
rolling_averages = sp500.rolling(horizon).mean()
ratio_column = f"Close_Ratio_{horizon}"
sp500[ratio_column] = sp500["Close"] / rolling_averages["Close"]
trend_column = f"Trend_{horizon}"
sp500[trend_column] = sp500.shift(1).rolling(horizon).sum()["Target"]
new_predictors += [ratio_column, trend_column]
^^^ these are the horizons that we want to view rolling means
sp500 = sp500.dropna()
sp500
Open | High | Low | Close | Volume | Tomorrow | Target | Close_Ratio_2 | Trend_2 | Close_Ratio_5 | Trend_5 | Close_Ratio_60 | Trend_60 | Close_Ratio_250 | Trend_250 | Close_Ratio_1000 | Trend_1000 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||
1993-12-14 00:00:00-05:00 | 465.730011 | 466.119995 | 462.459991 | 463.059998 | 275050000 | 461.839996 | 0 | 0.997157 | 1.0 | 0.996617 | 1.0 | 1.000283 | 32.0 | 1.028047 | 127.0 | 1.176082 | 512.0 |
1993-12-15 00:00:00-05:00 | 463.059998 | 463.690002 | 461.839996 | 461.839996 | 331770000 | 463.339996 | 1 | 0.998681 | 0.0 | 0.995899 | 1.0 | 0.997329 | 32.0 | 1.025151 | 126.0 | 1.172676 | 512.0 |
1993-12-16 00:00:00-05:00 | 461.859985 | 463.980011 | 461.859985 | 463.339996 | 284620000 | 466.380005 | 1 | 1.001621 | 1.0 | 0.999495 | 2.0 | 1.000311 | 32.0 | 1.028274 | 127.0 | 1.176163 | 513.0 |
1993-12-17 00:00:00-05:00 | 463.339996 | 466.380005 | 463.339996 | 466.380005 | 363750000 | 465.850006 | 0 | 1.003270 | 2.0 | 1.004991 | 3.0 | 1.006561 | 32.0 | 1.034781 | 128.0 | 1.183537 | 514.0 |
1993-12-20 00:00:00-05:00 | 466.380005 | 466.899994 | 465.529999 | 465.850006 | 255900000 | 465.299988 | 0 | 0.999431 | 1.0 | 1.003784 | 2.0 | 1.005120 | 32.0 | 1.033359 | 128.0 | 1.181856 | 513.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2024-09-06 00:00:00-04:00 | 5507.330078 | 5522.470215 | 5402.620117 | 5408.419922 | 3822800000 | 5471.049805 | 1 | 0.991295 | 0.0 | 0.979459 | 1.0 | 0.983825 | 34.0 | 1.084735 | 143.0 | 1.246199 | 524.0 |
2024-09-09 00:00:00-04:00 | 5442.069824 | 5484.200195 | 5434.490234 | 5471.049805 | 3825940000 | 5495.520020 | 1 | 1.005757 | 1.0 | 0.997207 | 1.0 | 0.995066 | 34.0 | 1.096431 | 143.0 | 1.260025 | 525.0 |
2024-09-10 00:00:00-04:00 | 5490.509766 | 5497.910156 | 5441.720215 | 5495.520020 | 3848180000 | 5554.129883 | 1 | 1.002231 | 2.0 | 1.002888 | 2.0 | 0.999330 | 34.0 | 1.100423 | 144.0 | 1.265038 | 526.0 |
2024-09-11 00:00:00-04:00 | 5496.419922 | 5560.410156 | 5406.959961 | 5554.129883 | 3839450000 | 5595.759766 | 1 | 1.005304 | 2.0 | 1.012325 | 3.0 | 1.009613 | 35.0 | 1.111192 | 144.0 | 1.277872 | 527.0 |
2024-09-12 00:00:00-04:00 | 5557.479980 | 5600.709961 | 5535.500000 | 5595.759766 | 3655070000 | 5626.020020 | 1 | 1.003734 | 2.0 | 1.016491 | 4.0 | 1.016803 | 35.0 | 1.118544 | 144.0 | 1.286765 | 528.0 |
7741 rows × 17 columns
IMPROVING THE MODEL
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)
def predict(train, test, predictors, model):
model.fit(train[predictors], train["Target"])
preds = model.predict_proba(test[predictors])[:,1]
preds[preds >=.6] = 1
preds[preds <.6] = 0
preds = pd.Series(preds, index=test.index, name="Predictions")
combined = pd.concat([test["Target"], preds], axis=1)
return combined
^^^ predict-proba method returns a probability of stock price going up/down. the 60% threshold means the model has to be more confident, which reducses number of trading days but increases chances
predictions = backtest(sp500, model, new_predictors)
predictions["Predictions"].value_counts()
count | |
---|---|
Predictions | |
0.0 | 4395 |
1.0 | 846 |
precision_score(predictions["Target"], predictions["Predictions"])
0.574468085106383
PAPER TRADE:
adding RSI to dataframe
# Define a function to calculate RSI
def calculate_rsi(data, window=14):
delta = data['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=window).mean()
avg_loss = loss.rolling(window=window).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
# Add RSI to your DataFrame using .loc to avoid SettingWithCopyWarning
sp500.loc[:, 'RSI'] = calculate_rsi(sp500)
# Verify that RSI is added correctly
print(sp500[['Close', 'RSI']].head())
Close RSI Date 1993-12-14 00:00:00-05:00 463.059998 NaN 1993-12-15 00:00:00-05:00 461.839996 NaN 1993-12-16 00:00:00-05:00 463.339996 NaN 1993-12-17 00:00:00-05:00 466.380005 NaN 1993-12-20 00:00:00-05:00 465.850006 NaN
<ipython-input-35-174ad213062f>:16: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy sp500.loc[:, 'RSI'] = calculate_rsi(sp500)
nan_rsi_count = sp500['RSI'].isna().sum()
print(nan_rsi_count)
13
sp500['RSI'] = sp500['RSI'].fillna(method='bfill')
nan_rsi_count = sp500['RSI'].isna().sum()
print(nan_rsi_count)
0
<ipython-input-37-bf28431096e5>:1: FutureWarning: Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead. sp500['RSI'] = sp500['RSI'].fillna(method='bfill') <ipython-input-37-bf28431096e5>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy sp500['RSI'] = sp500['RSI'].fillna(method='bfill')
Step 1: Filter buy/sell signals based on RSI
# Drop the 'RSI' column from predictions if it already exists
if 'RSI' in predictions.columns:
predictions = predictions.drop(columns=['RSI'])
# Merge the RSI column into the predictions DataFrame
predictions = predictions.merge(sp500[['RSI']], left_index=True, right_index=True)
# Define the conditions for buy/sell based on model prediction and RSI
buy_condition = (predictions['Predictions'] == 1) & (predictions['RSI'] < 10)
sell_condition = (predictions['Predictions'] == 0) & (predictions['RSI'] > 90)
# Create new columns for filtered buy/sell signals
predictions['Filtered_Signal'] = 0 # Default to 'hold' (no action)
predictions.loc[buy_condition, 'Filtered_Signal'] = 1 # Buy signal
predictions.loc[sell_condition, 'Filtered_Signal'] = -1 # Sell signal
# Display the filtered signals for reference
print(predictions[['Predictions', 'Filtered_Signal', 'RSI']].head())
Predictions Filtered_Signal RSI Date 2003-11-14 00:00:00-05:00 0.0 0 62.762215 2003-11-17 00:00:00-05:00 1.0 0 47.619030 2003-11-18 00:00:00-05:00 1.0 0 40.633446 2003-11-19 00:00:00-05:00 0.0 0 47.244003 2003-11-20 00:00:00-05:00 1.0 0 40.156976
count_buy = (predictions['Filtered_Signal'] == 1).sum()
count_sell = (predictions['Filtered_Signal'] == -1).sum()
print(f"Number of 'Buy' signals: {count_buy}")
print(f"Number of 'Sell' signals: {count_sell}")
Number of 'Buy' signals: 5 Number of 'Sell' signals: 51
MACD CROSSOVER + ONLY SELL OVERBOUGHT RSI SIMULATION (89% ACCURATE):
# Calculate the MACD (Moving Average Convergence Divergence)
short_window = 12 # Short-term EMA (e.g., 12-period EMA)
long_window = 26 # Long-term EMA (e.g., 26-period EMA)
signal_window = 9 # Signal line (e.g., 9-period EMA of MACD line)
# Calculate MACD and Signal Line
sp500['MACD'] = sp500['Close'].ewm(span=short_window, adjust=False).mean() - sp500['Close'].ewm(span=long_window, adjust=False).mean()
sp500['Signal_Line'] = sp500['MACD'].ewm(span=signal_window, adjust=False).mean()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Initialize values
initial_balance = 20000 # Starting balance of $20,000
balance = initial_balance
shares = 0
balance_over_time = []
# Trade log to track each trade
trade_log = pd.DataFrame(columns=['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss'])
# Use sp500 DataFrame for this
sp500 = sp500.copy()
sp500['Buy_Signal'] = (sp500['MACD'] > sp500['Signal_Line']) # MACD Cross up
sp500['Sell_Signal'] = (sp500['MACD'] < sp500['Signal_Line']) & (sp500['RSI'] > 70) # MACD Cross down + RSI > 70
# Initialize columns for tracking
sp500['Portfolio_Value'] = np.nan
sp500['Balance'] = np.nan
sp500['Shares_Held'] = np.nan
# Simulate the paper trading
for i, row in sp500.iterrows():
close_price = row['Close']
trade_date = row.name
amount_to_invest = 0.1 * balance # Recalculate 10% of the balance each time
if row['Buy_Signal'] and shares == 0: # Buy signal - invest 10% of balance
shares_to_buy = amount_to_invest // close_price # Calculate the number of shares to buy
shares_bought = shares_to_buy # Whole shares only
if shares_bought > 0: # Only proceed if we actually bought shares
shares += shares_bought
balance -= shares_bought * close_price # Update balance based on actual share purchase
# Log the trade
new_trade = pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares_bought],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares * close_price)],
'Profit/Loss': [0] # No profit or loss on buy
})
trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
elif row['Sell_Signal'] and shares > 0: # Sell signal - sell all shares if RSI > 70
sell_value = shares * close_price
balance += sell_value
# Calculate the profit/loss
buy_price = trade_log.iloc[-1]['Price'] # Get the last buy price
profit_loss = (close_price - buy_price) * shares
# Log the trade
new_trade = pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance],
'Profit/Loss': [profit_loss]
})
trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
shares = 0 # Sell all shares
# Record portfolio value (balance + value of held shares)
portfolio_value = balance + (shares * close_price)
balance_over_time.append(portfolio_value)
# Update the DataFrame with current values
sp500.at[i, 'Portfolio_Value'] = portfolio_value
sp500.at[i, 'Balance'] = balance
sp500.at[i, 'Shares_Held'] = shares
# Final balance after all trades
final_balance = balance + (shares * sp500['Close'].iloc[-1]) # If holding shares, sell at last close price
profit = final_balance - initial_balance
# Print final results
print(f"Final Balance: ${final_balance:.2f}")
print(f"Total Profit: ${profit:.2f}")
# Convert balance over time to pandas series for plotting
balance_over_time = pd.Series(balance_over_time, index=sp500.index[:len(balance_over_time)])
# Plotting Portfolio Value vs S&P 500 Close Price
plt.figure(figsize=(12, 6))
plt.plot(sp500.index, sp500['Portfolio_Value'], label='Portfolio Value', color='cyan')
plt.plot(sp500.index, sp500['Close'], label='S&P 500 Close Price', color='magenta', alpha=0.5)
plt.title('Portfolio Value vs S&P 500 Close Price')
plt.xlabel('Date')
plt.ylabel('Value ($)')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Trade summary
total_trades = len(trade_log[trade_log['Shares'] > 0]) # Only count trades with shares > 0
winning_trades = trade_log[trade_log['Profit/Loss'] > 0]
losing_trades = trade_log[trade_log['Profit/Loss'] < 0]
print(f"Total Trades: {total_trades}")
print(f"Winning Trades: {len(winning_trades)}")
print(f"Losing Trades: {len(losing_trades)}")
# Plotting Profit/Loss of each trade
plt.figure(figsize=(12, 6))
trade_log['Profit/Loss'].plot(kind='bar', color=['green' if x > 0 else 'red' for x in trade_log['Profit/Loss']])
plt.title('Profit/Loss of Each Trade')
plt.ylabel('Profit/Loss ($)')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Display the trade log with Profit/Loss
print(trade_log[['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss']])
<ipython-input-43-7b09048e26ca>:48: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
Final Balance: $22555.64 Total Profit: $2555.64
Total Trades: 70 Winning Trades: 31 Losing Trades: 4
Date Action Shares Price Balance \ 0 1993-12-17 00:00:00-05:00 Buy 4.0 466.380005 18134.479980 1 1995-04-11 00:00:00-04:00 Sell 4.0 505.529999 20156.599976 2 1995-04-27 00:00:00-04:00 Buy 3.0 513.549988 18615.950012 3 1995-06-29 00:00:00-04:00 Sell 3.0 543.869995 20247.559998 4 1995-07-06 00:00:00-04:00 Buy 3.0 553.989990 18585.590027 .. ... ... ... ... ... 65 2016-04-27 00:00:00-04:00 Sell 1.0 2095.149902 22388.800171 66 2016-05-25 00:00:00-04:00 Buy 1.0 2090.540039 20298.260132 67 2016-07-29 00:00:00-04:00 Sell 1.0 2173.600098 22471.860229 68 2016-09-22 00:00:00-04:00 Buy 1.0 2177.179932 20294.680298 69 2016-12-22 00:00:00-05:00 Sell 1.0 2260.959961 22555.640259 Portfolio Value Profit/Loss 0 20000.000000 0 1 20156.599976 156.599976 2 20156.599976 0 3 20247.559998 90.960022 4 20247.559998 0 .. ... ... 65 22388.800171 16.609863 66 22388.800171 0 67 22471.860229 83.060059 68 22471.860229 0 69 22555.640259 83.780029 [70 rows x 7 columns]
MACD Histogram + ONLY SELL RSI OVERBOUGHT Simulation/Strategy (88%):
# Calculate MACD Histogram (MACD - Signal Line)
sp500['MACD_Histogram'] = sp500['MACD'] - sp500['Signal_Line']
# Now you can rerun the MACD histogram strategy simulation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Initialize values
initial_balance = 20000 # Starting balance of $20,000
balance = initial_balance
shares = 0
amount_to_invest = 0.1 * balance # Invest 10% of balance on each trade
balance_over_time = []
# Trade log to track each trade
trade_log = pd.DataFrame(columns=['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss'])
# Calculate the MACD (Moving Average Convergence Divergence)
short_window = 12 # Short-term EMA (e.g., 12-period EMA)
long_window = 26 # Long-term EMA (e.g., 26-period EMA)
signal_window = 9 # Signal line (e.g., 9-period EMA of MACD line)
# Calculate MACD and Signal Line
sp500['MACD'] = sp500['Close'].ewm(span=short_window, adjust=False).mean() - sp500['Close'].ewm(span=long_window, adjust=False).mean()
sp500['Signal_Line'] = sp500['MACD'].ewm(span=signal_window, adjust=False).mean()
# Calculate MACD Histogram (MACD - Signal Line)
sp500['MACD_Histogram'] = sp500['MACD'] - sp500['Signal_Line']
# Initialize columns for tracking
sp500['Portfolio_Value'] = np.nan
sp500['Balance'] = np.nan
sp500['Shares_Held'] = np.nan
# Simulate the paper trading based on MACD Histogram signals
for i, row in sp500.iterrows():
close_price = row['Close']
trade_date = row.name
# Calculate amount to invest (10% of the current balance)
amount_to_invest = 0.1 * balance
# Buy when MACD histogram crosses above 0
if row['MACD_Histogram'] > 0 and shares == 0:
shares_to_buy = amount_to_invest // close_price # Buy whole shares only
# Only execute the trade if shares_to_buy is greater than 0
if shares_to_buy > 0:
shares += shares_to_buy
balance -= shares_to_buy * close_price
# Log the trade
new_trade = pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares * close_price)],
'Profit/Loss': [0] # Set profit/loss to 0 for now; we'll calculate it after sell
})
trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
# Sell when MACD histogram crosses below 0 and RSI is greater than 70 (overbought)
elif row['MACD_Histogram'] < 0 and shares > 0 and row['RSI'] > 70:
balance += shares * close_price
# Calculate profit/loss for the trade
buy_price = trade_log.loc[trade_log['Action'] == 'Buy', 'Price'].iloc[-1] if len(trade_log) > 0 else 0
profit_loss = (close_price - buy_price) * shares
# Log the trade
new_trade = pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance],
'Profit/Loss': [profit_loss]
})
trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
shares = 0 # Reset shares to 0 after selling
# Record portfolio value (balance + value of held shares)
portfolio_value = balance + (shares * close_price)
balance_over_time.append(portfolio_value)
# Update the DataFrame with current values
sp500.at[i, 'Portfolio_Value'] = portfolio_value
sp500.at[i, 'Balance'] = balance
sp500.at[i, 'Shares_Held'] = shares
# Final balance after all trades
final_balance = balance + (shares * sp500['Close'].iloc[-1]) # If holding shares, sell at last close price
profit = final_balance - initial_balance
# Print final results
print(f"Final Balance: ${final_balance:.2f}")
print(f"Total Profit: ${profit:.2f}")
# Convert balance over time to pandas series for plotting
balance_over_time = pd.Series(balance_over_time, index=sp500.index[:len(balance_over_time)])
# Plotting Portfolio Value and P/L
plt.figure(figsize=(12, 6))
plt.plot(sp500.index, sp500['Portfolio_Value'], label='Portfolio Value', color='cyan')
plt.plot(sp500.index, sp500['Close'], label='S&P 500 Close Price', color='magenta', alpha=0.5)
plt.title('Portfolio Value vs S&P 500 Close Price')
plt.xlabel('Date')
plt.ylabel('Value ($)')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Display the profit/loss per trade
trade_log['Profit/Loss'] = trade_log['Profit/Loss'].fillna(0) # Ensure no NaN values in P/L
trade_log.reset_index(drop=True, inplace=True)
# Plot Profit/Loss for each trade
plt.figure(figsize=(12, 6))
trade_log['Profit/Loss'].plot(kind='bar', color=['green' if x > 0 else 'red' for x in trade_log['Profit/Loss']])
plt.title('Profit/Loss of Each Trade')
plt.ylabel('Profit/Loss ($)')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Print trade summary
total_trades = len(trade_log) // 2 # Buy/Sell count as one trade
winning_trades = len(trade_log[trade_log['Profit/Loss'] > 0]) // 2
losing_trades = len(trade_log[trade_log['Profit/Loss'] < 0]) // 2
print(f"Total Trades: {total_trades}")
print(f"Winning Trades: {winning_trades}")
print(f"Losing Trades: {losing_trades}")
# Display trade log
print(trade_log[['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss']])
<ipython-input-45-a169aeb4ae19>:59: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
Final Balance: $22555.64 Total Profit: $2555.64
Total Trades: 35 Winning Trades: 15 Losing Trades: 2 Date Action Shares Price Balance \ 0 1993-12-17 00:00:00-05:00 Buy 4.0 466.380005 18134.479980 1 1995-04-11 00:00:00-04:00 Sell 4.0 505.529999 20156.599976 2 1995-04-27 00:00:00-04:00 Buy 3.0 513.549988 18615.950012 3 1995-06-29 00:00:00-04:00 Sell 3.0 543.869995 20247.559998 4 1995-07-06 00:00:00-04:00 Buy 3.0 553.989990 18585.590027 .. ... ... ... ... ... 65 2016-04-27 00:00:00-04:00 Sell 1.0 2095.149902 22388.800171 66 2016-05-25 00:00:00-04:00 Buy 1.0 2090.540039 20298.260132 67 2016-07-29 00:00:00-04:00 Sell 1.0 2173.600098 22471.860229 68 2016-09-22 00:00:00-04:00 Buy 1.0 2177.179932 20294.680298 69 2016-12-22 00:00:00-05:00 Sell 1.0 2260.959961 22555.640259 Portfolio Value Profit/Loss 0 20000.000000 0.000000 1 20156.599976 156.599976 2 20156.599976 0.000000 3 20247.559998 90.960022 4 20247.559998 0.000000 .. ... ... 65 22388.800171 16.609863 66 22388.800171 0.000000 67 22471.860229 83.060059 68 22471.860229 0.000000 69 22555.640259 83.780029 [70 rows x 7 columns]
BOLLINGER BANDS (89% ACCURATE):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Bollinger Bands Parameters
window = 20 # Typical period for Bollinger Bands
std_dev = 2 # Standard deviation multiplier
# Calculate Bollinger Bands
sp500['Rolling_Mean'] = sp500['Close'].rolling(window).mean()
sp500['Rolling_Std'] = sp500['Close'].rolling(window).std()
sp500['Bollinger_Upper'] = sp500['Rolling_Mean'] + (std_dev * sp500['Rolling_Std'])
sp500['Bollinger_Lower'] = sp500['Rolling_Mean'] - (std_dev * sp500['Rolling_Std'])
# Initialize variables for simulation
initial_balance = 20000
balance = initial_balance
shares = 0
amount_to_invest = 0.1 * balance # 10% of current balance
balance_over_time = []
# Trade log to track each trade
trade_log = pd.DataFrame(columns=['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value'])
# Simulate the Bollinger Bands Strategy
for i, row in sp500.iterrows():
close_price = row['Close']
trade_date = row.name
# Buy signal - price crosses below the lower band and we are not holding shares
if close_price < row['Bollinger_Lower'] and shares == 0:
shares_to_buy = (amount_to_invest // close_price) * close_price
shares += shares_to_buy // close_price
balance -= shares_to_buy
# Log the trade
new_trade = pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares * close_price)]
})
trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
# Sell signal - price crosses above the upper band and we are holding shares
elif close_price > row['Bollinger_Upper'] and shares > 0:
balance += shares * close_price
# Log the trade
new_trade = pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance]
})
trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
shares = 0 # Reset shares after selling
# Record portfolio value (balance + value of held shares)
portfolio_value = balance + (shares * close_price)
balance_over_time.append(portfolio_value)
# Update the DataFrame with current values
sp500.at[i, 'Portfolio_Value'] = portfolio_value
sp500.at[i, 'Balance'] = balance
sp500.at[i, 'Shares_Held'] = shares
# Final balance after all trades
final_balance = balance + (shares * sp500['Close'].iloc[-1]) # If holding shares, sell at last close price
profit = final_balance - initial_balance
# Print final results
print(f"Final Balance: ${final_balance:.2f}")
print(f"Total Profit: ${profit:.2f}")
# Convert balance over time to pandas series for plotting
balance_over_time = pd.Series(balance_over_time, index=sp500.index[:len(balance_over_time)])
# Plotting Portfolio Value vs S&P 500 Close Price
plt.figure(figsize=(12, 6))
plt.plot(sp500.index, sp500['Portfolio_Value'], label='Portfolio Value', color='cyan')
plt.plot(sp500.index, sp500['Close'], label='S&P 500 Close Price', color='magenta', alpha=0.5)
plt.plot(sp500.index, sp500['Bollinger_Upper'], label='Upper Bollinger Band', color='green', linestyle='--', alpha=0.6)
plt.plot(sp500.index, sp500['Bollinger_Lower'], label='Lower Bollinger Band', color='red', linestyle='--', alpha=0.6)
plt.title('Portfolio Value vs S&P 500 Close Price with Bollinger Bands')
plt.xlabel('Date')
plt.ylabel('Value ($)')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Display Profit/Loss of each trade
trade_log['Profit/Loss'] = 0
for i in range(1, len(trade_log), 2):
buy_price = trade_log.loc[i-1, 'Price']
sell_price = trade_log.loc[i, 'Price']
shares = trade_log.loc[i-1, 'Shares']
profit_loss = (sell_price - buy_price) * shares
trade_log.at[i, 'Profit/Loss'] = profit_loss
# Plotting Profit/Loss for each trade
plt.figure(figsize=(12, 6))
trade_log['Profit/Loss'].plot(kind='bar', color=['green' if x > 0 else 'red' for x in trade_log['Profit/Loss']])
plt.title('Profit/Loss of Each Trade')
plt.ylabel('Profit/Loss ($)')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Summary of trades
total_trades = len(trade_log) // 2 # Each trade consists of a Buy and Sell
winning_trades = len(trade_log[trade_log['Profit/Loss'] > 0]) // 2
losing_trades = len(trade_log[trade_log['Profit/Loss'] < 0]) // 2
print(f"Total Trades: {total_trades}")
print(f"Winning Trades: {winning_trades}")
print(f"Losing Trades: {losing_trades}")
# Display trade log
print(trade_log[['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss']])
<ipython-input-46-4da79694ac3f>:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. trade_log = pd.concat([trade_log, new_trade], ignore_index=True)
Final Balance: $22193.58 Total Profit: $2193.58
<ipython-input-46-4da79694ac3f>:104: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '1.7200927734375' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. trade_log.at[i, 'Profit/Loss'] = profit_loss
Total Trades: 100 Winning Trades: 24 Losing Trades: 3 Date Action Shares Price Balance \ 0 1994-03-25 00:00:00-05:00 Buy 4.0 460.579987 18157.680054 1 1994-08-01 00:00:00-04:00 Sell 4.0 461.010010 20001.720093 2 1994-09-20 00:00:00-04:00 Buy 4.0 463.359985 18148.280151 3 1995-01-13 00:00:00-05:00 Sell 4.0 465.970001 20012.160156 4 1995-10-10 00:00:00-04:00 Buy 3.0 577.520020 18279.600098 .. ... ... ... ... ... 196 2024-04-18 00:00:00-04:00 Buy 0.0 5011.120117 22193.580505 197 2024-04-19 00:00:00-04:00 Buy 0.0 4967.229980 22193.580505 198 2024-07-25 00:00:00-04:00 Buy 0.0 5399.220215 22193.580505 199 2024-08-02 00:00:00-04:00 Buy 0.0 5346.560059 22193.580505 200 2024-08-05 00:00:00-04:00 Buy 0.0 5186.330078 22193.580505 Portfolio Value Profit/Loss 0 20000.000000 0.000000 1 20001.720093 1.720093 2 20001.720093 0.000000 3 20012.160156 10.440063 4 20012.160156 0.000000 .. ... ... 196 22193.580505 0.000000 197 22193.580505 -0.000000 198 22193.580505 0.000000 199 22193.580505 -0.000000 200 22193.580505 0.000000 [201 rows x 7 columns]
COMBINING THE FOUR MODELS:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Initialize values
initial_balance = 500 # Starting balance of $20,000
balance = initial_balance
balance_over_time = []
# Track open trades for each strategy
open_trades = {'MACD Crossover': False, 'MACD Histogram': False, 'Bollinger Bands': False, 'Golden Cross/Death Cross': False}
shares_held = {'MACD Crossover': 0, 'MACD Histogram': 0, 'Bollinger Bands': 0, 'Golden Cross/Death Cross': 0}
# Trade log to track each trade
trade_log = pd.DataFrame(columns=['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss', 'Strategy'])
# Simulate the paper trading for each strategy (example: MACD Crossover, Bollinger Bands, etc.)
for i, row in sp500.iterrows():
close_price = row['Close']
trade_date = row.name
amount_to_invest = 1 * balance # Recalculate 10% of the balance each time
### MACD Crossover Strategy ###
if row['MACD'] > row['Signal_Line'] and not open_trades['MACD Crossover']: # Buy signal
shares_to_buy = amount_to_invest // close_price
if shares_to_buy > 0:
open_trades['MACD Crossover'] = True
shares_held['MACD Crossover'] = shares_to_buy
balance -= shares_to_buy * close_price
# Log the buy trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares_to_buy],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares_to_buy * close_price)],
'Profit/Loss': [0], # No profit/loss for a buy
'Strategy': ['MACD Crossover']
})], ignore_index=True)
elif row['MACD'] < row['Signal_Line'] and row['RSI'] > 70 and open_trades['MACD Crossover']: # Sell signal
sell_value = shares_held['MACD Crossover'] * close_price
balance += sell_value
# Calculate the profit/loss
buy_price = trade_log.loc[trade_log['Strategy'] == 'MACD Crossover', 'Price'].iloc[-1]
profit_loss = (close_price - buy_price) * shares_held['MACD Crossover']
# Log the sell trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares_held['MACD Crossover']],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance],
'Profit/Loss': [profit_loss],
'Strategy': ['MACD Crossover']
})], ignore_index=True)
open_trades['MACD Crossover'] = False # Reset to allow future trades
shares_held['MACD Crossover'] = 0 # Reset shares after selling
### Bollinger Bands Strategy ###
if close_price < row['Bollinger_Lower'] and not open_trades['Bollinger Bands']: # Buy signal
shares_to_buy = amount_to_invest // close_price
if shares_to_buy > 0:
open_trades['Bollinger Bands'] = True
shares_held['Bollinger Bands'] = shares_to_buy
balance -= shares_to_buy * close_price
# Log the buy trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares_to_buy],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares_to_buy * close_price)],
'Profit/Loss': [0], # No profit/loss for a buy
'Strategy': ['Bollinger Bands']
})], ignore_index=True)
elif close_price > row['Bollinger_Upper'] and open_trades['Bollinger Bands']: # Sell signal
sell_value = shares_held['Bollinger Bands'] * close_price
balance += sell_value
# Calculate the profit/loss
buy_price = trade_log.loc[trade_log['Strategy'] == 'Bollinger Bands', 'Price'].iloc[-1]
profit_loss = (close_price - buy_price) * shares_held['Bollinger Bands']
# Log the sell trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares_held['Bollinger Bands']],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance],
'Profit/Loss': [profit_loss],
'Strategy': ['Bollinger Bands']
})], ignore_index=True)
open_trades['Bollinger Bands'] = False # Reset to allow future trades
shares_held['Bollinger Bands'] = 0 # Reset shares after selling
# Record portfolio value (balance + value of held shares)
portfolio_value = balance + (shares_held['MACD Crossover'] * close_price) + (shares_held['Bollinger Bands'] * close_price)
balance_over_time.append(portfolio_value)
# Final balance after all trades
final_balance = balance + (shares_held['MACD Crossover'] * sp500['Close'].iloc[-1]) + (shares_held['Bollinger Bands'] * sp500['Close'].iloc[-1])
profit = final_balance - initial_balance
# Print final results
print(f"Final Balance: ${final_balance:.2f}")
print(f"Total Profit: ${profit:.2f}")
# Convert balance over time to pandas series for plotting
balance_over_time = pd.Series(balance_over_time, index=sp500.index[:len(balance_over_time)])
# Plotting Portfolio Value vs S&P 500 Close Price
plt.figure(figsize=(12, 6))
plt.plot(sp500.index, balance_over_time, label='Portfolio Value', color='cyan')
plt.plot(sp500.index, sp500['Close'], label='S&P 500 Close Price', color='magenta', alpha=0.5)
plt.title('Portfolio Value vs S&P 500 Close Price')
plt.xlabel('Date')
plt.ylabel('Value ($)')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Trade summary
total_trades = len(trade_log[trade_log['Shares'] > 0]) # Only count trades with shares > 0
winning_trades = trade_log[trade_log['Profit/Loss'] > 0]
losing_trades = trade_log[trade_log['Profit/Loss'] < 0]
print(f"Total Trades: {total_trades}")
print(f"Winning Trades: {len(winning_trades)}")
print(f"Losing Trades: {len(losing_trades)}")
# Plotting Profit/Loss of each trade
plt.figure(figsize=(12, 6))
trade_log['Profit/Loss'].plot(kind='bar', color=['green' if x > 0 else 'red' for x in trade_log['Profit/Loss']])
plt.title('Profit/Loss of Each Trade')
plt.ylabel('Profit/Loss ($)')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()
# Display the trade log with Profit/Loss
print(trade_log[['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss', 'Strategy']])
<ipython-input-47-24db981f89a6>:32: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. trade_log = pd.concat([trade_log, pd.DataFrame({
Final Balance: $5813.37 Total Profit: $5313.37
Total Trades: 129 Winning Trades: 57 Losing Trades: 7
Date Action Shares Price Balance \ 0 1993-12-17 00:00:00-05:00 Buy 1.0 466.380005 33.619995 1 1995-04-11 00:00:00-04:00 Sell 1.0 505.529999 539.149994 2 1995-04-27 00:00:00-04:00 Buy 1.0 513.549988 25.600006 3 1995-06-29 00:00:00-04:00 Sell 1.0 543.869995 569.470001 4 1995-07-06 00:00:00-04:00 Buy 1.0 553.989990 15.480011 .. ... ... ... ... ... 124 2024-01-22 00:00:00-05:00 Buy 1.0 4850.430176 390.580170 125 2024-03-12 00:00:00-04:00 Sell 1.0 5175.270020 5565.850189 126 2024-03-21 00:00:00-04:00 Buy 1.0 5241.529785 324.320404 127 2024-06-28 00:00:00-04:00 Sell 1.0 5460.479980 5784.800385 128 2024-07-05 00:00:00-04:00 Buy 1.0 5567.189941 217.610443 Portfolio Value Profit/Loss Strategy 0 500.000000 0 MACD Crossover 1 539.149994 39.149994 MACD Crossover 2 539.149994 0 MACD Crossover 3 569.470001 30.320007 MACD Crossover 4 569.470001 0 MACD Crossover .. ... ... ... 124 5241.010345 0 MACD Crossover 125 5565.850189 324.839844 MACD Crossover 126 5565.850189 0 MACD Crossover 127 5784.800385 218.950195 MACD Crossover 128 5784.800385 0 MACD Crossover [129 rows x 8 columns]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
# Initialize values
initial_balance = 500 # Starting balance
balance = initial_balance
balance_over_time = []
holding_periods = [] # To track when we were holding stock
# Track open trades for each strategy
open_trades = {'MACD Crossover': False, 'MACD Histogram': False, 'Bollinger Bands': False, 'Golden Cross/Death Cross': False}
shares_held = {'MACD Crossover': 0, 'MACD Histogram': 0, 'Bollinger Bands': 0, 'Golden Cross/Death Cross': 0}
# Trade log to track each trade
trade_log = pd.DataFrame(columns=['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss', 'Strategy'])
# Simulate the paper trading for each strategy
for i, row in sp500.iterrows():
close_price = row['Close']
trade_date = row.name
amount_to_invest = 1 * balance # Recalculate 100% of the balance each time
### MACD Crossover Strategy ###
if row['MACD'] > row['Signal_Line'] and not open_trades['MACD Crossover']: # Buy signal
shares_to_buy = amount_to_invest // close_price
if shares_to_buy > 0:
open_trades['MACD Crossover'] = True
shares_held['MACD Crossover'] = shares_to_buy
balance -= shares_to_buy * close_price
holding_periods.append([trade_date, None]) # Start of holding period
# Log the buy trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares_to_buy],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares_to_buy * close_price)],
'Profit/Loss': [0], # No profit/loss for a buy
'Strategy': ['MACD Crossover']
})], ignore_index=True)
elif row['MACD'] < row['Signal_Line'] and row['RSI'] > 70 and open_trades['MACD Crossover']: # Sell signal
sell_value = shares_held['MACD Crossover'] * close_price
balance += sell_value
# Calculate the profit/loss
buy_price = trade_log.loc[trade_log['Strategy'] == 'MACD Crossover', 'Price'].iloc[-1]
profit_loss = (close_price - buy_price) * shares_held['MACD Crossover']
# Log the sell trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares_held['MACD Crossover']],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance],
'Profit/Loss': [profit_loss],
'Strategy': ['MACD Crossover']
})], ignore_index=True)
open_trades['MACD Crossover'] = False # Reset to allow future trades
shares_held['MACD Crossover'] = 0 # Reset shares after selling
holding_periods[-1][1] = trade_date # End of holding period
### Bollinger Bands Strategy ###
if close_price < row['Bollinger_Lower'] and not open_trades['Bollinger Bands']: # Buy signal
shares_to_buy = amount_to_invest // close_price
if shares_to_buy > 0:
open_trades['Bollinger Bands'] = True
shares_held['Bollinger Bands'] = shares_to_buy
balance -= shares_to_buy * close_price
holding_periods.append([trade_date, None]) # Start of holding period
# Log the buy trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Buy'],
'Shares': [shares_to_buy],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance + (shares_to_buy * close_price)],
'Profit/Loss': [0], # No profit/loss for a buy
'Strategy': ['Bollinger Bands']
})], ignore_index=True)
elif close_price > row['Bollinger_Upper'] and open_trades['Bollinger Bands']: # Sell signal
sell_value = shares_held['Bollinger Bands'] * close_price
balance += sell_value
# Calculate the profit/loss
buy_price = trade_log.loc[trade_log['Strategy'] == 'Bollinger Bands', 'Price'].iloc[-1]
profit_loss = (close_price - buy_price) * shares_held['Bollinger Bands']
# Log the sell trade
trade_log = pd.concat([trade_log, pd.DataFrame({
'Date': [trade_date],
'Action': ['Sell'],
'Shares': [shares_held['Bollinger Bands']],
'Price': [close_price],
'Balance': [balance],
'Portfolio Value': [balance],
'Profit/Loss': [profit_loss],
'Strategy': ['Bollinger Bands']
})], ignore_index=True)
open_trades['Bollinger Bands'] = False # Reset to allow future trades
shares_held['Bollinger Bands'] = 0 # Reset shares after selling
holding_periods[-1][1] = trade_date # End of holding period
# Record portfolio value (balance + value of held shares)
portfolio_value = balance + (shares_held['MACD Crossover'] * close_price) + (shares_held['Bollinger Bands'] * close_price)
balance_over_time.append(portfolio_value)
# Final balance after all trades
final_balance = balance + (shares_held['MACD Crossover'] * sp500['Close'].iloc[-1]) + (shares_held['Bollinger Bands'] * sp500['Close'].iloc[-1])
profit = final_balance - initial_balance
# Convert balance over time to pandas series for plotting
balance_over_time = pd.Series(balance_over_time, index=sp500.index[:len(balance_over_time)])
# Set up the dashboard layout using GridSpec
fig = plt.figure(figsize=(10, 7))
fig.patch.set_facecolor('black') # Set figure background to black
gs = GridSpec(3, 3, figure=fig)
# Plot 1: Portfolio Value vs S&P 500 Close Price with shading for holding periods
ax1 = fig.add_subplot(gs[0, :])
ax1.plot(sp500.index, balance_over_time, label='Portfolio Value', color='cyan')
ax1.plot(sp500.index, sp500['Close'], label='S&P 500 Close Price', color='magenta', alpha=0.5)
ax1.set_title('Portfolio Value vs S&P 500 Close Price', color='white')
ax1.set_xlabel('Date', color='white')
ax1.set_ylabel('Value ($)', color='white')
ax1.legend()
ax1.grid(True, linestyle='--', alpha=0.3)
ax1.set_facecolor('black') # Set axes background to black
ax1.tick_params(colors='white') # Set tick colors to white
# Add shading for holding periods
for period in holding_periods:
if period[1]: # Ensure the holding period has an end date
ax1.axvspan(period[0], period[1], color='orange', alpha=0.2)
# Plot 2: Profit/Loss of each trade without x-axis labels
ax2 = fig.add_subplot(gs[1, :2])
trade_log['Profit/Loss'].plot(kind='bar', ax=ax2, color=['green' if x > 0 else 'red' for x in trade_log['Profit/Loss']])
ax2.set_title('Profit/Loss of Each Trade', color='white')
ax2.set_ylabel('Profit/Loss ($)', color='white')
ax2.grid(True, linestyle='--', alpha=0.3)
ax2.set_facecolor('black') # Set axes background to black
ax2.set_xticks([])
ax2.tick_params(colors='white') # Set tick colors to white
# Plot 3: Trade summary table with directionality percentage (Moved table to a higher position)
ax3 = fig.add_subplot(gs[1:, 2])
ax3.axis('off')
# Create a trade summary table (Rounded Win/Loss Ratio as percentage, moved table higher)
win_trades = len(trade_log[trade_log['Profit/Loss'] > 0])
loss_trades = len(trade_log[trade_log['Profit/Loss'] < 0])
summary_data = pd.DataFrame({
'Winning Trades': [win_trades],
'Losing Trades': [loss_trades],
})
# Increase column width to avoid text truncation
col_widths = [0.35, 0.35]
# Display the summary table
table = ax3.table(cellText=summary_data.values, colLabels=summary_data.columns, cellLoc='center', loc='upper center', colWidths=col_widths)
# Set table background color to black and text to white for visibility
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.5, 1.5)
# Set the background and font color for the table cells
for key, cell in table.get_celld().items():
cell.set_edgecolor('white') # White borders
cell.set_text_props(color='white') # White text
cell.set_facecolor('black') # Black background for table cells
# Add space at the top and move everything down
plt.tight_layout()
# Adjust the top to move everything down and add space at the top
plt.subplots_adjust(top=0.85, bottom=0.1, hspace=0.5)
# Show the dashboard
plt.show()
# Display the trade log with Profit/Loss
print(trade_log[['Date', 'Action', 'Shares', 'Price', 'Balance', 'Portfolio Value', 'Profit/Loss', 'Strategy']])
<ipython-input-50-aefd7ecb95c8>:35: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation. trade_log = pd.concat([trade_log, pd.DataFrame({
Date Action Shares Price Balance \ 0 1993-12-17 00:00:00-05:00 Buy 1.0 466.380005 33.619995 1 1995-04-11 00:00:00-04:00 Sell 1.0 505.529999 539.149994 2 1995-04-27 00:00:00-04:00 Buy 1.0 513.549988 25.600006 3 1995-06-29 00:00:00-04:00 Sell 1.0 543.869995 569.470001 4 1995-07-06 00:00:00-04:00 Buy 1.0 553.989990 15.480011 .. ... ... ... ... ... 124 2024-01-22 00:00:00-05:00 Buy 1.0 4850.430176 390.580170 125 2024-03-12 00:00:00-04:00 Sell 1.0 5175.270020 5565.850189 126 2024-03-21 00:00:00-04:00 Buy 1.0 5241.529785 324.320404 127 2024-06-28 00:00:00-04:00 Sell 1.0 5460.479980 5784.800385 128 2024-07-05 00:00:00-04:00 Buy 1.0 5567.189941 217.610443 Portfolio Value Profit/Loss Strategy 0 500.000000 0 MACD Crossover 1 539.149994 39.149994 MACD Crossover 2 539.149994 0 MACD Crossover 3 569.470001 30.320007 MACD Crossover 4 569.470001 0 MACD Crossover .. ... ... ... 124 5241.010345 0 MACD Crossover 125 5565.850189 324.839844 MACD Crossover 126 5565.850189 0 MACD Crossover 127 5784.800385 218.950195 MACD Crossover 128 5784.800385 0 MACD Crossover [129 rows x 8 columns]