Please run those two cells before running the Notebook!
As those plotting settings are standard throughout the book, we do not show them in the book every time we plot something.
%matplotlib inline
%config InlineBackend.figure_format = "retina"
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
# feel free to modify, for example, change the context to "notebook"
sns.set_theme(context="talk", style="whitegrid",
palette="colorblind", color_codes=True,
rc={"figure.figsize": [12, 8]})
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
RISKY_ASSET = "AMZN"
MARKET_BENCHMARK = "^GSPC"
START_DATE = "2016-01-01"
END_DATE = "2020-12-31"
df = yf.download([RISKY_ASSET, MARKET_BENCHMARK],
start=START_DATE,
end=END_DATE,
adjusted=True,
progress=False)
print(f'Downloaded {df.shape[0]} rows of data.')
Downloaded 1259 rows of data.
X = (
df["Adj Close"]
.rename(columns={RISKY_ASSET: "asset",
MARKET_BENCHMARK: "market"})
.resample("M")
.last()
.pct_change()
.dropna()
)
X.head()
asset | market | |
---|---|---|
Date | ||
2016-01-31 | -0.131515 | -0.050735 |
2016-02-29 | -0.058739 | -0.004128 |
2016-03-31 | 0.074423 | 0.065991 |
2016-04-30 | 0.111094 | 0.002699 |
2016-05-31 | 0.095817 | 0.015325 |
covariance = X.cov().iloc[0,1]
benchmark_variance = X.market.var()
beta = covariance / benchmark_variance
beta
1.2034611811489746
# separate target
y = X.pop("asset")
# add constant
X = sm.add_constant(X)
# define and fit the regression model
capm_model = sm.OLS(y, X).fit()
# print results
print(capm_model.summary())
OLS Regression Results ============================================================================== Dep. Variable: asset R-squared: 0.408 Model: OLS Adj. R-squared: 0.398 Method: Least Squares F-statistic: 40.05 Date: Fri, 22 Jul 2022 Prob (F-statistic): 3.89e-08 Time: 00:19:10 Log-Likelihood: 80.639 No. Observations: 60 AIC: -157.3 Df Residuals: 58 BIC: -153.1 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 0.0167 0.009 1.953 0.056 -0.000 0.034 market 1.2035 0.190 6.329 0.000 0.823 1.584 ============================================================================== Omnibus: 2.202 Durbin-Watson: 1.783 Prob(Omnibus): 0.333 Jarque-Bera (JB): 1.814 Skew: 0.426 Prob(JB): 0.404 Kurtosis: 2.989 Cond. No. 23.0 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Or, using the formula notation:
import statsmodels.formula.api as smf
# rerun step 4 to have a DF with columns: `asset` and `market`
X = df["Adj Close"].rename(columns={RISKY_ASSET: "asset",
MARKET_BENCHMARK: "market"}) \
.resample("M") \
.last() \
.pct_change() \
.dropna()
# define and fit the regression model
capm_model = smf.ols(formula="asset ~ market", data=X).fit()
# print results
print(capm_model.summary())
OLS Regression Results ============================================================================== Dep. Variable: asset R-squared: 0.408 Model: OLS Adj. R-squared: 0.398 Method: Least Squares F-statistic: 40.05 Date: Wed, 02 Mar 2022 Prob (F-statistic): 3.89e-08 Time: 23:28:12 Log-Likelihood: 80.639 No. Observations: 60 AIC: -157.3 Df Residuals: 58 BIC: -153.1 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0167 0.009 1.953 0.056 -0.000 0.034 market 1.2035 0.190 6.329 0.000 0.823 1.584 ============================================================================== Omnibus: 2.202 Durbin-Watson: 1.783 Prob(Omnibus): 0.333 Jarque-Bera (JB): 1.814 Skew: 0.426 Prob(JB): 0.404 Kurtosis: 2.989 Cond. No. 23.0 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# period length in days
N_DAYS = 90
# download data from Yahoo finance
df_rf = yf.download("^IRX",
start=START_DATE,
end=END_DATE,
progress=False)
# resample to monthly by taking last value from each month
rf = df_rf.resample("M").last().Close / 100
# calculate the corresponding daily risk-free return
rf = ( 1 / (1 - rf * N_DAYS / 360) )**(1 / N_DAYS)
# convert to monthly and subtract 1
rf = (rf ** 30) - 1
# plot the risk-free rate
rf.plot(title="Risk-free rate (13 Week Treasury Bill)")
sns.despine()
plt.tight_layout()
# plt.savefig("images/figure_9_2", dpi=200)
import pandas_datareader.data as web
# download the data
rf = web.DataReader(
"TB3MS", "fred", start=START_DATE, end=END_DATE
)
# convert to monthly
rf = (1 + (rf / 100)) ** (1 / 12) - 1
# plot the risk-free rate
rf.plot(title="Risk-free rate (3-Month Treasury Bill)")
sns.despine()
plt.tight_layout()
# plt.savefig("images/figure_9_3", dpi=200)
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web
RISKY_ASSET = "AAPL"
START_DATE = "2016-01-01"
END_DATE = "2020-12-31"
ff_dict = web.DataReader("F-F_Research_Data_Factors",
"famafrench",
start=START_DATE,
end=END_DATE)
ff_dict.keys()
dict_keys([0, 1, 'DESCR'])
print(ff_dict['DESCR'])
F-F Research Data Factors ------------------------- This file was created by CMPT_ME_BEME_RETS using the 202201 CRSP database. The 1-month TBill return is from Ibbotson and Associates, Inc. Copyright 2022 Kenneth R. French 0 : (60 rows x 4 cols) 1 : Annual Factors: January-December (5 rows x 4 cols)
factor_3_df = ff_dict[0].rename(columns={"Mkt-RF": "MKT"}) \
.div(100)
factor_3_df.head()
MKT | SMB | HML | RF | |
---|---|---|---|---|
Date | ||||
2016-01 | -0.0577 | -0.0339 | 0.0207 | 0.0001 |
2016-02 | -0.0008 | 0.0081 | -0.0057 | 0.0002 |
2016-03 | 0.0696 | 0.0075 | 0.0110 | 0.0002 |
2016-04 | 0.0092 | 0.0067 | 0.0321 | 0.0001 |
2016-05 | 0.0178 | -0.0019 | -0.0165 | 0.0001 |
asset_df = yf.download(RISKY_ASSET,
start=START_DATE,
end=END_DATE,
adjusted=True,
progress=False)
print(f"Downloaded {asset_df.shape[0]} rows of data.")
Downloaded 1259 rows of data.
y = asset_df["Adj Close"].resample("M") \
.last() \
.pct_change() \
.dropna()
y.index = y.index.to_period("m")
y.name = "rtn"
y.head()
Date 2016-01 -0.075242 2016-02 -0.001288 2016-03 0.127211 2016-04 -0.139921 2016-05 0.071773 Freq: M, Name: rtn, dtype: float64
factor_3_df = factor_3_df.join(y)
factor_3_df["excess_rtn"] = (
factor_3_df["rtn"] - factor_3_df["RF"]
)
factor_3_df.head()
MKT | SMB | HML | RF | rtn | excess_rtn | |
---|---|---|---|---|---|---|
Date | ||||||
2016-01 | -0.0577 | -0.0339 | 0.0207 | 0.0001 | -0.075242 | -0.075342 |
2016-02 | -0.0008 | 0.0081 | -0.0057 | 0.0002 | -0.001288 | -0.001488 |
2016-03 | 0.0696 | 0.0075 | 0.0110 | 0.0002 | 0.127211 | 0.127011 |
2016-04 | 0.0092 | 0.0067 | 0.0321 | 0.0001 | -0.139921 | -0.140021 |
2016-05 | 0.0178 | -0.0019 | -0.0165 | 0.0001 | 0.071773 | 0.071673 |
# define and fit the regression model
ff_model = smf.ols(formula="excess_rtn ~ MKT + SMB + HML",
data=factor_3_df).fit()
# print results
print(ff_model.summary())
OLS Regression Results ============================================================================== Dep. Variable: excess_rtn R-squared: 0.504 Model: OLS Adj. R-squared: 0.477 Method: Least Squares F-statistic: 18.94 Date: Wed, 02 Mar 2022 Prob (F-statistic): 1.32e-08 Time: 23:48:12 Log-Likelihood: 82.679 No. Observations: 60 AIC: -157.4 Df Residuals: 56 BIC: -149.0 Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0084 0.009 0.954 0.344 -0.009 0.026 MKT 1.4264 0.198 7.213 0.000 1.030 1.823 SMB -0.4590 0.359 -1.280 0.206 -1.177 0.259 HML -0.7186 0.260 -2.759 0.008 -1.240 -0.197 ============================================================================== Omnibus: 8.642 Durbin-Watson: 2.458 Prob(Omnibus): 0.013 Jarque-Bera (JB): 8.952 Skew: -0.652 Prob(JB): 0.0114 Kurtosis: 4.371 Cond. No. 45.8 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Print available datasets (here only first 5):
from pandas_datareader.famafrench import get_available_datasets
get_available_datasets()[:5]
['F-F_Research_Data_Factors', 'F-F_Research_Data_Factors_weekly', 'F-F_Research_Data_Factors_daily', 'F-F_Research_Data_5_Factors_2x3', 'F-F_Research_Data_5_Factors_2x3_daily']
To do so, we used the fact that we can execute bash commands in Jupyter Notebooks by preceding them with !
. First, we downloaded the file using wget and then unzipped it using unzip. There are also ways to do this in Python only, but this seemed like a good place to introduce the possibility of mixing up bash script into the Notebooks. The link to the monthly data is always the same, and the file is updated every month.
# download the zip file from Prof. French's website
!wget http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip
# unpack the zip file
!unzip -a F-F_Research_Data_Factors_CSV.zip
# remove the zip file
!rm F-F_Research_Data_Factors_CSV.zip
zsh:1: command not found: wget unzip: cannot find or open F-F_Research_Data_Factors_CSV.zip, F-F_Research_Data_Factors_CSV.zip.zip or F-F_Research_Data_Factors_CSV.zip.ZIP. rm: F-F_Research_Data_Factors_CSV.zip: No such file or directory
# load data from CSV
factor_3_df = pd.read_csv("F-F_Research_Data_Factors.csv", skiprows=3)
# identify where the annual data starts
STR_TO_MATCH = " Annual Factors: January-December "
indices = factor_3_df.iloc[:, 0] == STR_TO_MATCH
start_of_annual = factor_3_df[indices].index[0]
# keep only monthly data
factor_3_df = factor_3_df[factor_3_df.index < start_of_annual]
# rename columns
factor_3_df.columns = ["date", "mkt", "smb", "hml", "rf"]
# convert strings to datetime
factor_3_df["date"] = (
pd.to_datetime(factor_3_df["date"], format="%Y%m")
.dt.strftime("%Y-%m")
)
# set index
factor_3_df = factor_3_df.set_index("date")
# filter only required dates
factor_3_df = factor_3_df.loc[START_DATE:END_DATE]
factor_3_df = factor_3_df.apply(pd.to_numeric,
errors="coerce") \
.div(100)
factor_3_df.head()
mkt | smb | hml | rf | |
---|---|---|---|---|
date | ||||
2016-02 | -0.0007 | 0.0079 | -0.0050 | 0.0002 |
2016-03 | 0.0696 | 0.0087 | 0.0116 | 0.0002 |
2016-04 | 0.0092 | 0.0069 | 0.0326 | 0.0001 |
2016-05 | 0.0178 | -0.0027 | -0.0181 | 0.0001 |
2016-06 | -0.0005 | 0.0065 | -0.0147 | 0.0002 |
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web
ASSETS = ["AMZN", "GOOG", "AAPL", "MSFT"]
WEIGHTS = [0.25, 0.25, 0.25, 0.25]
START_DATE = "2010-01-01"
END_DATE = "2020-12-31"
factor_3_df = web.DataReader("F-F_Research_Data_Factors",
"famafrench",
start=START_DATE,
end=END_DATE)[0]
factor_3_df = factor_3_df.div(100)
asset_df = yf.download(ASSETS,
start=START_DATE,
end=END_DATE,
adjusted=True,
progress=False)
print(f"Downloaded {asset_df.shape[0]} rows of data.")
Downloaded 2769 rows of data.
asset_df = asset_df["Adj Close"].resample("M") \
.last() \
.pct_change() \
.dropna()
# reformat index for joining
asset_df.index = asset_df.index.to_period("m")
asset_df["portfolio_returns"] = np.matmul(
asset_df[ASSETS].values,
WEIGHTS
)
asset_df.head()
AAPL | AMZN | GOOG | MSFT | portfolio_returns | |
---|---|---|---|---|---|
Date | |||||
2010-01 | -0.088597 | -0.067722 | -0.145231 | -0.075459 | -0.094252 |
2010-02 | 0.065396 | -0.055897 | -0.005925 | 0.022146 | 0.006430 |
2010-03 | 0.148470 | 0.146706 | 0.076538 | 0.021626 | 0.098335 |
2010-04 | 0.111022 | 0.009796 | -0.073036 | 0.042677 | 0.022615 |
2010-05 | -0.016125 | -0.084902 | -0.076222 | -0.151395 | -0.082161 |
asset_df.plot();
factor_3_df = asset_df.join(factor_3_df).drop(ASSETS, axis=1)
factor_3_df.columns = ["portf_rtn", "mkt", "smb", "hml", "rf"]
factor_3_df["portf_ex_rtn"] = (
factor_3_df["portf_rtn"] - factor_3_df["rf"]
)
def rolling_factor_model(input_data, formula, window_size):
"""
Function for estimating the Fama-French (n-factor) model using a rolling window of fixed size.
Parameters
------------
input_data : pd.DataFrame
A DataFrame containing the factors and asset/portfolio returns
formula : str
`statsmodels` compatible formula representing the OLS regression
window_size : int
Rolling window length.
Returns
-----------
coeffs_df : pd.DataFrame
DataFrame containing the intercept and the three factors for each iteration.
"""
coeffs = []
for start_ind in range(len(input_data) - window_size + 1):
end_ind = start_ind + window_size
# define and fit the regression model
ff_model = smf.ols(
formula=formula,
data=input_data[start_ind:end_ind]
).fit()
# store coefficients
coeffs.append(ff_model.params)
coeffs_df = pd.DataFrame(
coeffs,
index=input_data.index[window_size - 1:]
)
return coeffs_df
MODEL_FORMULA = "portf_ex_rtn ~ mkt + smb + hml"
results_df = rolling_factor_model(factor_3_df,
MODEL_FORMULA,
window_size=60)
(
results_df
.plot(title = "Rolling Fama-French Three-Factor model",
style=["-", "--", "-.", ":"])
.legend(loc="center left",bbox_to_anchor=(1.0, 0.5))
)
sns.despine()
plt.tight_layout()
# plt.savefig("images/figure_9_6", dpi=200)
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web
RISKY_ASSET = "AMZN"
START_DATE = "2016-01-01"
END_DATE = "2020-12-31"
# three factors
factor_3_df = web.DataReader("F-F_Research_Data_Factors",
"famafrench",
start=START_DATE,
end=END_DATE)[0]
# momentum factor
momentum_df = web.DataReader("F-F_Momentum_Factor",
"famafrench",
start=START_DATE,
end=END_DATE)[0]
# five factors
factor_5_df = web.DataReader("F-F_Research_Data_5_Factors_2x3",
"famafrench",
start=START_DATE,
end=END_DATE)[0]
asset_df = yf.download(RISKY_ASSET,
start=START_DATE,
end=END_DATE,
adjusted=True,
progress=False)
print(f"Downloaded {asset_df.shape[0]} rows of data.")
Downloaded 1259 rows of data.
y = asset_df["Adj Close"].resample("M") \
.last() \
.pct_change() \
.dropna()
y.index = y.index.to_period("m")
y.name = "rtn"
# join all datasets on the index
factor_4_df = factor_3_df.join(momentum_df).join(y)
# rename columns
factor_4_df.columns = ["mkt", "smb", "hml", "rf", "mom", "rtn"]
# divide everything (except returns) by 100
factor_4_df.loc[:, factor_4_df.columns != "rtn"] /= 100
# calculate excess returns
factor_4_df["excess_rtn"] = (
factor_4_df["rtn"] - factor_4_df["rf"]
)
factor_4_df.head()
mkt | smb | hml | rf | mom | rtn | excess_rtn | |
---|---|---|---|---|---|---|---|
Date | |||||||
2016-01 | -0.0577 | -0.0339 | 0.0207 | 0.0001 | 0.0139 | -0.131516 | -0.131616 |
2016-02 | -0.0008 | 0.0081 | -0.0057 | 0.0002 | -0.0426 | -0.058739 | -0.058939 |
2016-03 | 0.0696 | 0.0075 | 0.0110 | 0.0002 | -0.0504 | 0.074423 | 0.074223 |
2016-04 | 0.0092 | 0.0067 | 0.0321 | 0.0001 | -0.0607 | 0.111094 | 0.110994 |
2016-05 | 0.0178 | -0.0019 | -0.0165 | 0.0001 | 0.0137 | 0.095817 | 0.095717 |
# join all datasets on the index
factor_5_df = factor_5_df.join(y)
# rename columns
factor_5_df.columns = [
"mkt", "smb", "hml", "rmw", "cma", "rf", "rtn"
]
# divide everything (except returns) by 100
factor_5_df.loc[:, factor_5_df.columns != "rtn"] /= 100
# calculate excess returns
factor_5_df["excess_rtn"] = (
factor_5_df["rtn"] - factor_5_df["rf"]
)
factor_5_df.head()
mkt | smb | hml | rmw | cma | rf | rtn | excess_rtn | |
---|---|---|---|---|---|---|---|---|
Date | ||||||||
2016-01 | -0.0577 | -0.0342 | 0.0207 | 0.0281 | 0.0309 | 0.0001 | -0.131516 | -0.131616 |
2016-02 | -0.0008 | 0.0093 | -0.0057 | 0.0332 | 0.0196 | 0.0002 | -0.058739 | -0.058939 |
2016-03 | 0.0696 | 0.0101 | 0.0110 | 0.0073 | -0.0002 | 0.0002 | 0.074423 | 0.074223 |
2016-04 | 0.0092 | 0.0115 | 0.0321 | -0.0292 | 0.0189 | 0.0001 | 0.111094 | 0.110994 |
2016-05 | 0.0178 | -0.0064 | -0.0165 | -0.0109 | -0.0249 | 0.0001 | 0.095817 | 0.095717 |
four_factor_model = smf.ols(
formula="excess_rtn ~ mkt + smb + hml + mom",
data=factor_4_df
).fit()
print(four_factor_model.summary())
OLS Regression Results ============================================================================== Dep. Variable: excess_rtn R-squared: 0.563 Model: OLS Adj. R-squared: 0.532 Method: Least Squares F-statistic: 17.74 Date: Thu, 03 Mar 2022 Prob (F-statistic): 2.10e-09 Time: 00:07:34 Log-Likelihood: 89.673 No. Observations: 60 AIC: -169.3 Df Residuals: 55 BIC: -158.9 Df Model: 4 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0054 0.008 0.676 0.502 -0.011 0.021 mkt 1.4461 0.188 7.709 0.000 1.070 1.822 smb -0.4336 0.340 -1.276 0.207 -1.115 0.247 hml -0.7914 0.274 -2.888 0.006 -1.341 -0.242 mom 0.2220 0.269 0.826 0.412 -0.316 0.760 ============================================================================== Omnibus: 0.390 Durbin-Watson: 2.032 Prob(Omnibus): 0.823 Jarque-Bera (JB): 0.276 Skew: 0.163 Prob(JB): 0.871 Kurtosis: 2.933 Cond. No. 50.8 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
five_factor_model = smf.ols(
formula="excess_rtn ~ mkt + smb + hml + rmw + cma",
data=factor_5_df
).fit()
print(five_factor_model.summary())
OLS Regression Results ============================================================================== Dep. Variable: excess_rtn R-squared: 0.612 Model: OLS Adj. R-squared: 0.576 Method: Least Squares F-statistic: 17.01 Date: Thu, 03 Mar 2022 Prob (F-statistic): 4.54e-10 Time: 00:07:35 Log-Likelihood: 93.191 No. Observations: 60 AIC: -174.4 Df Residuals: 54 BIC: -161.8 Df Model: 5 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0059 0.008 0.772 0.444 -0.009 0.021 mkt 1.5117 0.193 7.851 0.000 1.126 1.898 smb -0.9411 0.335 -2.807 0.007 -1.613 -0.269 hml -0.5433 0.281 -1.936 0.058 -1.106 0.019 rmw -1.1628 0.513 -2.266 0.027 -2.191 -0.134 cma -0.5153 0.509 -1.012 0.316 -1.536 0.505 ============================================================================== Omnibus: 0.073 Durbin-Watson: 2.074 Prob(Omnibus): 0.964 Jarque-Bera (JB): 0.181 Skew: -0.077 Prob(JB): 0.913 Kurtosis: 2.779 Cond. No. 79.4 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import pandas as pd
import pandas_datareader.data as web
from linearmodels.asset_pricing import LinearFactorModel
START_DATE = "2010"
END_DATE = "2020-12"
factor_5_df = (
web.DataReader("F-F_Research_Data_5_Factors_2x3",
"famafrench",
start=START_DATE,
end=END_DATE)[0]
.div(100)
)
factor_5_df.head()
Mkt-RF | SMB | HML | RMW | CMA | RF | |
---|---|---|---|---|---|---|
Date | ||||||
2010-01 | -0.0336 | 0.0035 | 0.0043 | -0.0123 | 0.0044 | 0.0000 |
2010-02 | 0.0340 | 0.0151 | 0.0322 | -0.0028 | 0.0140 | 0.0000 |
2010-03 | 0.0631 | 0.0185 | 0.0221 | -0.0063 | 0.0167 | 0.0001 |
2010-04 | 0.0200 | 0.0498 | 0.0289 | 0.0070 | 0.0174 | 0.0001 |
2010-05 | -0.0789 | 0.0004 | -0.0244 | 0.0127 | -0.0023 | 0.0001 |
portfolio_df = (
web.DataReader("12_Industry_Portfolios",
"famafrench",
start=START_DATE,
end=END_DATE)[0]
.div(100)
.sub(factor_5_df["RF"], axis=0)
)
portfolio_df.head()
NoDur | Durbl | Manuf | Enrgy | Chems | BusEq | Telcm | Utils | Shops | Hlth | Money | Other | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||||
2010-01 | -0.0233 | -0.0094 | -0.0436 | -0.0489 | -0.0109 | -0.0793 | -0.0670 | -0.0449 | -0.0185 | -0.0001 | -0.0107 | -0.0256 |
2010-02 | 0.0272 | 0.0738 | 0.0583 | 0.0256 | 0.0413 | 0.0481 | 0.0285 | -0.0041 | 0.0429 | 0.0038 | 0.0270 | 0.0465 |
2010-03 | 0.0597 | 0.0948 | 0.0779 | 0.0323 | 0.0410 | 0.0666 | 0.0759 | 0.0312 | 0.0623 | 0.0360 | 0.0816 | 0.0886 |
2010-04 | -0.0094 | 0.0748 | 0.0422 | 0.0404 | 0.0127 | 0.0220 | 0.0358 | 0.0284 | 0.0255 | -0.0223 | 0.0092 | 0.0430 |
2010-05 | -0.0569 | -0.0900 | -0.0920 | -0.1023 | -0.0677 | -0.0769 | -0.0582 | -0.0630 | -0.0535 | -0.0802 | -0.0922 | -0.0822 |
factor_5_df = factor_5_df.drop("RF", axis=1)
factor_5_df.head()
Mkt-RF | SMB | HML | RMW | CMA | |
---|---|---|---|---|---|
Date | |||||
2010-01 | -0.0336 | 0.0035 | 0.0043 | -0.0123 | 0.0044 |
2010-02 | 0.0340 | 0.0151 | 0.0322 | -0.0028 | 0.0140 |
2010-03 | 0.0631 | 0.0185 | 0.0221 | -0.0063 | 0.0167 |
2010-04 | 0.0200 | 0.0498 | 0.0289 | 0.0070 | 0.0174 |
2010-05 | -0.0789 | 0.0004 | -0.0244 | 0.0127 | -0.0023 |
five_factor_model = LinearFactorModel(
portfolios=portfolio_df,
factors=factor_5_df
)
result = five_factor_model.fit()
print(result)
LinearFactorModel Estimation Summary ================================================================================ No. Test Portfolios: 12 R-squared: 0.7906 No. Factors: 5 J-statistic: 9.9132 No. Observations: 132 P-value 0.1935 Date: Thu, Mar 03 2022 Distribution: chi2(7) Time: 00:14:16 Cov. Estimator: robust Risk Premia Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Mkt-RF 0.0123 0.0038 3.2629 0.0011 0.0049 0.0198 SMB -0.0063 0.0052 -1.2085 0.2269 -0.0165 0.0039 HML -0.0089 0.0032 -2.7764 0.0055 -0.0152 -0.0026 RMW -0.0009 0.0046 -0.1953 0.8451 -0.0099 0.0081 CMA -0.0025 0.0039 -0.6467 0.5178 -0.0100 0.0051 ============================================================================== Covariance estimator: HeteroskedasticCovariance See full_summary for complete results
We can also print the full summary (1 aggregate and 12 individual ones for each portfolio separately).
print(result.full_summary)
LinearFactorModel Estimation Summary ================================================================================ No. Test Portfolios: 12 R-squared: 0.7906 No. Factors: 5 J-statistic: 9.9132 No. Observations: 132 P-value 0.1935 Date: Thu, Mar 03 2022 Distribution: chi2(7) Time: 00:14:16 Cov. Estimator: robust Risk Premia Estimates ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ Mkt-RF 0.0123 0.0038 3.2629 0.0011 0.0049 0.0198 SMB -0.0063 0.0052 -1.2085 0.2269 -0.0165 0.0039 HML -0.0089 0.0032 -2.7764 0.0055 -0.0152 -0.0026 RMW -0.0009 0.0046 -0.1953 0.8451 -0.0099 0.0081 CMA -0.0025 0.0039 -0.6467 0.5178 -0.0100 0.0051 NoDur Coefficients ============================================================================== Parameter Std. Err. T-stat P-value Lower CI Upper CI ------------------------------------------------------------------------------ alpha -0.0008 0.0015 -0.5523 1.4192 -0.0038 0.0021 Mkt-RF 0.7861 0.0455 17.271 0.0000 0.6969 0.8753 SMB -0.2158 0.0946 -2.2820 1.9775 -0.4012 -0.0305 HML -0.0838 0.1020 -0.8217 1.5888 -0.2838 0.1161 RMW 0.4681 0.1211 3.8663 0.0001 0.2308 0.7054 CMA 0.3388 0.1498 2.2619 0.0237 0.0452 0.6324 Durbl Coefficients ============================================================================== alpha 0.0020 0.0021 0.9666 0.3337 -0.0021 0.0062 Mkt-RF 1.5488 0.1479 10.474 0.0000 1.2590 1.8386 SMB 0.5808 0.1796 3.2342 0.0012 0.2288 0.9329 HML -0.1684 0.1708 -0.9860 1.6759 -0.5030 0.1663 RMW 0.2942 0.4764 0.6176 0.5369 -0.6395 1.2279 CMA 0.2994 0.2703 1.1078 0.2679 -0.2303 0.8291 Manuf Coefficients ============================================================================== alpha 0.0014 0.0016 0.8404 0.4007 -0.0018 0.0045 Mkt-RF 1.0950 0.0481 22.749 0.0000 1.0006 1.1893 SMB 0.2910 0.0625 4.6554 0.0000 0.1685 0.4135 HML 0.1468 0.0596 2.4640 0.0137 0.0300 0.2636 RMW 0.0867 0.1180 0.7346 0.4626 -0.1446 0.3180 CMA -0.0110 0.1186 -0.0926 1.0738 -0.2435 0.2215 Enrgy Coefficients ============================================================================== alpha -0.0037 0.0021 -1.7798 1.9249 -0.0078 0.0004 Mkt-RF 1.2480 0.1279 9.7577 0.0000 0.9973 1.4987 SMB 0.4873 0.1796 2.7135 0.0067 0.1353 0.8393 HML 0.6308 0.1674 3.7677 0.0002 0.3027 0.9589 RMW 0.2439 0.3043 0.8012 0.4230 -0.3526 0.8404 CMA 0.4045 0.2683 1.5074 0.1317 -0.1214 0.9305 Chems Coefficients ============================================================================== alpha -0.0012 0.0015 -0.8116 1.5830 -0.0040 0.0017 Mkt-RF 0.8852 0.0465 19.020 0.0000 0.7940 0.9764 SMB -0.0893 0.0856 -1.0438 1.7034 -0.2570 0.0784 HML -0.0185 0.0561 -0.3298 1.2585 -0.1284 0.0914 RMW 0.1720 0.0910 1.8909 0.0586 -0.0063 0.3503 CMA 0.2107 0.1048 2.0117 0.0443 0.0054 0.4160 BusEq Coefficients ============================================================================== alpha -0.0022 0.0015 -1.4658 1.8573 -0.0051 0.0007 Mkt-RF 1.1169 0.0336 33.235 0.0000 1.0511 1.1828 SMB -0.1798 0.0639 -2.8132 1.9951 -0.3050 -0.0545 HML -0.1429 0.0718 -1.9907 1.9535 -0.2836 -0.0022 RMW -0.0223 0.0970 -0.2299 1.1818 -0.2123 0.1677 CMA -0.5616 0.1099 -5.1080 2.0000 -0.7771 -0.3461 Telcm Coefficients ============================================================================== alpha 0.0010 0.0020 0.4845 0.6280 -0.0029 0.0049 Mkt-RF 0.8962 0.0484 18.517 0.0000 0.8014 0.9911 SMB -0.0954 0.0906 -1.0526 1.7075 -0.2730 0.0822 HML -0.0862 0.1253 -0.6878 1.5084 -0.3319 0.1595 RMW 0.2448 0.1192 2.0532 0.0401 0.0111 0.4784 CMA 0.6837 0.2044 3.3455 0.0008 0.2832 1.0843 Utils Coefficients ============================================================================== alpha 0.0029 0.0025 1.1826 0.2370 -0.0019 0.0077 Mkt-RF 0.4577 0.0770 5.9437 0.0000 0.3067 0.6086 SMB -0.1272 0.1411 -0.9011 1.6325 -0.4037 0.1494 HML 0.0769 0.1841 0.4179 0.6760 -0.2839 0.4378 RMW 0.3192 0.2109 1.5137 0.1301 -0.0941 0.7325 CMA 0.1941 0.2739 0.7088 0.4785 -0.3427 0.7309 Shops Coefficients ============================================================================== alpha 0.0007 0.0019 0.4031 0.6869 -0.0029 0.0044 Mkt-RF 0.9482 0.0458 20.685 0.0000 0.8583 1.0380 SMB 0.0568 0.0775 0.7336 0.4632 -0.0950 0.2086 HML -0.3332 0.0618 -5.3949 2.0000 -0.4543 -0.2122 RMW 0.4477 0.1086 4.1221 0.0000 0.2348 0.6606 CMA 0.2888 0.1083 2.6673 0.0076 0.0766 0.5010 Hlth Coefficients ============================================================================== alpha -0.0016 0.0021 -0.7540 1.5492 -0.0056 0.0025 Mkt-RF 0.8193 0.0504 16.268 0.0000 0.7205 0.9180 SMB 0.0169 0.0949 0.1783 0.8585 -0.1690 0.2028 HML -0.4532 0.0933 -4.8574 2.0000 -0.6361 -0.2703 RMW -0.3409 0.1389 -2.4541 1.9859 -0.6131 -0.0686 CMA 0.2833 0.1606 1.7635 0.0778 -0.0316 0.5981 Money Coefficients ============================================================================== alpha 0.0024 0.0017 1.4241 0.1544 -0.0009 0.0056 Mkt-RF 1.0385 0.0335 30.967 0.0000 0.9728 1.1043 SMB -0.0043 0.0703 -0.0611 1.0487 -0.1422 0.1336 HML 0.6226 0.0665 9.3611 0.0000 0.4922 0.7530 RMW -0.5510 0.0815 -6.7618 2.0000 -0.7107 -0.3913 CMA -0.3396 0.1091 -3.1131 1.9981 -0.5534 -0.1258 Other Coefficients ============================================================================== alpha 3.703e-05 0.0014 0.0271 0.9784 -0.0026 0.0027 Mkt-RF 1.0369 0.0409 25.359 0.0000 0.9567 1.1170 SMB 0.0824 0.0548 1.5024 0.1330 -0.0251 0.1898 HML 0.0969 0.0592 1.6361 0.1018 -0.0192 0.2130 RMW 0.1131 0.1051 1.0766 0.2817 -0.0928 0.3191 CMA 0.2255 0.0947 2.3800 0.0173 0.0398 0.4112 ============================================================================== Covariance estimator: HeteroskedasticCovariance See full_summary for complete results
from statsmodels.api import OLS, add_constant
factor_loadings = []
for portfolio in portfolio_df:
reg_1 = OLS(
endog=portfolio_df.loc[:, portfolio],
exog=add_constant(factor_5_df)
).fit()
factor_loadings.append(reg_1.params.drop("const"))
factor_load_df = pd.DataFrame(
factor_loadings,
columns=factor_5_df.columns,
index=portfolio_df.columns
)
factor_load_df.head()
Mkt-RF | SMB | HML | RMW | CMA | |
---|---|---|---|---|---|
NoDur | 0.786087 | -0.215818 | -0.083847 | 0.468129 | 0.338823 |
Durbl | 1.548809 | 0.580849 | -0.168357 | 0.294196 | 0.299400 |
Manuf | 1.094951 | 0.291003 | 0.146831 | 0.086695 | -0.010987 |
Enrgy | 1.248025 | 0.487285 | 0.630805 | 0.243854 | 0.404512 |
Chems | 0.885184 | -0.089296 | -0.018501 | 0.171997 | 0.210732 |
risk_premia = []
for period in portfolio_df.index:
reg_2 = OLS(
endog=portfolio_df.loc[period, factor_load_df.index],
exog=factor_load_df
).fit()
risk_premia.append(reg_2.params)
risk_premia_df = pd.DataFrame(
risk_premia,
index=portfolio_df.index,
columns=factor_load_df.columns.tolist())
risk_premia_df.head()
Mkt-RF | SMB | HML | RMW | CMA | |
---|---|---|---|---|---|
Date | |||||
2010-01 | -0.032631 | 0.051998 | -0.023749 | -0.039525 | 0.015071 |
2010-02 | 0.036662 | 0.020982 | -0.014351 | 0.027181 | -0.029331 |
2010-03 | 0.065954 | -0.031731 | -0.003074 | -0.001531 | -0.001160 |
2010-04 | 0.019455 | 0.048860 | 0.009688 | 0.040766 | -0.014576 |
2010-05 | -0.076882 | 0.024591 | -0.021421 | 0.021403 | -0.014296 |
risk_premia_df.mean()
Mkt-RF 0.012339 SMB -0.006277 HML -0.008939 RMW -0.000895 CMA -0.002490 dtype: float64