import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import statsmodels.formula.api as smf
from yahoo_fin import stock_info as si
def get_daily_prices(ticker, start_date, end_date):
df = si.get_data(ticker,interval='1d',start_date=start_date, end_date=end_date,index_as_date=False)
df['price']=df['adjclose']
df=df[['date','price','ticker']]
return df
def get_prices(tickerlist,start, end):
df=pd.DataFrame(columns=['date','price','ticker'])
for ticker in tickerlist:
df=pd.concat([df,get_daily_prices(ticker, start, end)])
return df
def run_diff_in_diff(df, event_date, treated):
start_date = event_date - datetime.timedelta(days=180)
df = df[(df['date'] >= start_date) & (df['date'] <= event_date) & (df['returns'].isnull()==0)].copy()
df['treated'] = np.where(df['ticker']==treated, 1, 0)
df['post'] = np.where(df['date']>=event_date, 1, 0)
df['treated_post'] = df['treated'] * df['post']
formula = "returns ~ treated + post + treated_post"
model = smf.ols(formula=formula, data=df).fit()
return model
def difference_in_trends(df, event_date, treated):
start_date = event_date - datetime.timedelta(days=180)
df = df[(df['date'] >= start_date) & (df['date'] <= event_date)].copy()
df['treated'] = np.where(df['ticker']==treated, 1, 0)
df['trend'] = (event_date - df['date']).dt.days
df['treated_trend'] = df['treated']*df['trend']
formula = "returns ~ treated + trend + treated_trend"
model = smf.ols(formula=formula, data=df).fit()
return model
def plot_histogram(df, model, param, mytitle):
quantile_2_5 = df['betas'].quantile(0.025)
quantile_97_5 = df['betas'].quantile(0.975)
model_est = model.params[param]
plt.hist(df['betas'], bins=10, alpha=0.7)
plt.axvline(quantile_2_5, color='black', linestyle='--', linewidth=1, label='2.5th pctl')
plt.axvline(quantile_97_5, color='black', linestyle='--', linewidth=1, label='97.5th pctl')
plt.axvline(model_est, color='red', linestyle='--', linewidth=1, label='Event Impact')
plt.legend(loc='upper left')
plt.title(mytitle)
plt.show()
def get_event_study_placebos(df_wide, pre_event_dates, cutoff_days, formula):
pvalues = list()
betas = list()
for placebo_date in pre_event_dates:
cutoff = pd.to_datetime(placebo_date) - datetime.timedelta(days=cutoff_days)
placebo_df = df_wide[(df_wide['date'] >= cutoff) & (df_wide['date'] <= placebo_date)].copy()
placebo_df['event'] = np.where(placebo_df['date']==placebo_date,1,0)
placebo_model=smf.ols(formula=formula, data=placebo_df).fit()
pvalues.append(placebo_model.pvalues['event'])
betas.append(placebo_model.params['event'])
placebo_estimates = pd.DataFrame({'betas':betas, 'pvalues':pvalues})
rmse = np.sqrt((placebo_estimates['betas']**2).mean())
return(placebo_estimates,rmse)
# start_date = '01-01-2024'
# end_date = '08-15-2024'
# df=pd.DataFrame()
# for ticker in list(['SBUX','VOO','QSR','YUM','MCD']):
# df=pd.concat([df,si.get_data(ticker,interval='1d',start_date=start_date, end_date=end_date,index_as_date=False)])
# print(df)
# df['returns'] = np.where(df['ticker']==df['ticker'].shift(),
# (df['close']/df['close'].shift())-1, np.nan)
# df.to_csv('sbux_ceo_prices.csv', index=False)
event_date = '2024-08-13'
prices = pd.read_csv('sbux_ceo_prices.csv')
prices = prices[prices['returns'].isnull()==0].copy()
prices.head()
date | ticker | close | open | high | low | adjclose | volume | returns | |
---|---|---|---|---|---|---|---|---|---|
1 | 2024-01-03 | SBUX | 93.230003 | 93.959999 | 94.239998 | 93.059998 | 91.422096 | 7161700.0 | -0.004697 |
2 | 2024-01-04 | SBUX | 93.550003 | 93.099998 | 94.580002 | 93.000000 | 91.735893 | 7118600.0 | 0.003432 |
3 | 2024-01-05 | SBUX | 92.989998 | 93.330002 | 93.580002 | 92.480003 | 91.186745 | 7189900.0 | -0.005986 |
4 | 2024-01-08 | SBUX | 94.190002 | 93.019997 | 94.570000 | 92.949997 | 92.363472 | 7536900.0 | 0.012905 |
5 | 2024-01-09 | SBUX | 93.089996 | 93.610001 | 93.730003 | 92.860001 | 91.284798 | 6348500.0 | -0.011679 |
total_shares = 1132800000
pre = prices[prices['date']<event_date].copy()
lastprice = pre.loc[(pre['date']==max(pre['date'])) & (pre['ticker']=='SBUX'),'close'].values[0]
lastmarketvalue = lastprice * total_shares
print(f"Last Market Value: $ {round(lastmarketvalue)}")
prices[prices['date']<event_date].groupby(['ticker'])['returns'].agg(['count','mean','std'])
Last Market Value: $ 87259582617
count | mean | std | |
---|---|---|---|
ticker | |||
MCD | 153 | -0.000568 | 0.011725 |
QSR | 153 | -0.000522 | 0.014667 |
SBUX | 153 | -0.001079 | 0.019548 |
VOO | 153 | 0.000826 | 0.007927 |
YUM | 153 | 0.000379 | 0.010292 |
df_wide = prices.pivot(index='date', columns='ticker', values='returns')
df_wide.reset_index(inplace=True)
df_wide.columns.name = None
df_wide['date'] = pd.to_datetime(df_wide['date'])
df_wide.head()
date | MCD | QSR | SBUX | VOO | YUM | |
---|---|---|---|---|---|---|
0 | 2024-01-03 | -0.008921 | -0.005985 | -0.004697 | -0.007419 | 0.000543 |
1 | 2024-01-04 | -0.009002 | -0.000916 | 0.003432 | -0.003157 | -0.003176 |
2 | 2024-01-05 | -0.009426 | 0.001310 | -0.005986 | 0.001281 | -0.002720 |
3 | 2024-01-08 | 0.010070 | 0.022635 | 0.012905 | 0.014303 | 0.002338 |
4 | 2024-01-09 | -0.003529 | -0.003199 | -0.011679 | -0.002430 | -0.003265 |
cutoff_days = 120
event_date = pd.to_datetime(event_date)
cutoff = event_date - datetime.timedelta(days=cutoff_days)
df = df_wide[(df_wide['date'] >= cutoff) & (df_wide['date'] <= event_date)].copy()
df['event'] = np.where(df['date']==event_date,1,0)
formula = 'SBUX~MCD+QSR+VOO+YUM+event'
model = smf.ols(formula=formula, data=df).fit()
print(model.summary())
OLS Regression Results ============================================================================== Dep. Variable: SBUX R-squared: 0.670 Model: OLS Adj. R-squared: 0.649 Method: Least Squares F-statistic: 31.66 Date: Fri, 30 Aug 2024 Prob (F-statistic): 1.81e-17 Time: 19:34:03 Log-Likelihood: 206.12 No. Observations: 84 AIC: -400.2 Df Residuals: 78 BIC: -385.7 Df Model: 5 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -0.0006 0.002 -0.242 0.809 -0.005 0.004 MCD -0.2654 0.233 -1.138 0.259 -0.730 0.199 QSR 0.3962 0.190 2.088 0.040 0.019 0.774 VOO -0.0934 0.297 -0.314 0.754 -0.685 0.498 YUM 0.9978 0.265 3.761 0.000 0.470 1.526 event 0.2287 0.022 10.259 0.000 0.184 0.273 ============================================================================== Omnibus: 31.439 Durbin-Watson: 2.296 Prob(Omnibus): 0.000 Jarque-Bera (JB): 187.864 Skew: -0.854 Prob(JB): 1.61e-41 Kurtosis: 10.124 Cond. No. 138. ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print("Estimated Gain: ${:,}".format(round(lastmarketvalue*model.params['event'])))
Estimated Gain: $19,955,241,349
placebo_start = event_date - datetime.timedelta(days=120)
pre_event_dates = df_wide[(df_wide['date'] >= placebo_start) & (df_wide['date'] < event_date)]['date'].unique()
mytitle = "Event Study Placebo Distribution"
placebo_estimates, rmse = get_event_study_placebos(df_wide, pre_event_dates, cutoff_days, formula)
print(f"RMSE: {rmse:.3f}")
plot_histogram(placebo_estimates, model,"event", mytitle)
RMSE: 0.025
event_date = pd.to_datetime(event_date)
treated = 'SBUX'
prices['date'] = pd.to_datetime(prices['date'])
model = run_diff_in_diff(prices, event_date, treated)
print(model.summary())
OLS Regression Results ============================================================================== Dep. Variable: returns R-squared: 0.339 Model: OLS Adj. R-squared: 0.336 Method: Least Squares F-statistic: 105.2 Date: Fri, 30 Aug 2024 Prob (F-statistic): 5.40e-55 Time: 19:34:04 Log-Likelihood: 1774.3 No. Observations: 620 AIC: -3541. Df Residuals: 616 BIC: -3523. Df Model: 3 Covariance Type: nonrobust ================================================================================ coef std err t P>|t| [0.025 0.975] -------------------------------------------------------------------------------- Intercept -9.701e-05 0.001 -0.155 0.877 -0.001 0.001 treated -0.0013 0.001 -0.925 0.355 -0.004 0.001 post 0.0117 0.007 1.679 0.094 -0.002 0.025 treated_post 0.2347 0.016 15.063 0.000 0.204 0.265 ============================================================================== Omnibus: 393.962 Durbin-Watson: 2.064 Prob(Omnibus): 0.000 Jarque-Bera (JB): 19739.502 Skew: -2.129 Prob(JB): 0.00 Kurtosis: 30.313 Cond. No. 29.3 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
placebo_start = event_date - datetime.timedelta(days=120)
prices['date'] = pd.to_datetime(prices['date'])
pre_event_dates = prices[(prices['date'] >= placebo_start) & (prices['date'] < event_date)]['date'].unique()
pvalues = list()
betas = list()
for placebo_date in pre_event_dates:
placebo_model=run_diff_in_diff(prices, pd.to_datetime(placebo_date), treated)
pvalues.append(placebo_model.pvalues['treated_post'])
betas.append(placebo_model.params['treated_post'])
df = pd.DataFrame({'betas':betas, 'pvalues':pvalues})
rmse = np.sqrt((df['betas']**2).mean())
print(f"RMSE: {rmse:.3f}")
mytitle = "Diff-in-Diff Placebo Estimates"
plot_histogram(df, model, "treated_post", mytitle)
RMSE: 0.022
print("Estimated Gain: ${:,}".format(round(lastmarketvalue*model.params['treated_post'])))
Estimated Gain: $20,476,849,918