#!/usr/bin/env python # coding: utf-8 # Open In Colab # # Running the First Regression in Python # Suppose this is your first time to write the code. Perhaps, you want to run a simple regression using two series of asset prices to fin the equity beta. Let's use a step-by-step approach to complete the task. # # Step 1: Download two assets' prices from the web # Step 2: Put them onto a matrix form # Step 3: Run the OLS # Step 4: Plot data # ### Step 1: Download data # We will use yahoo finance package (https://pypi.org/project/yfinance/) to download Yahoo Finance data from the web. We need to (1) install and (2) import this package. # In[ ]: get_ipython().system('pip install yfinance # to install, remove # and run the cell') import yfinance as yf # to import # In[3]: # download mystock = yf.download("TSLA", start="2011-01-01", end="2022-05-31", interval='1mo')['Adj Close'].rename('TSLA') index = yf.download("SPY", start="2011-01-01", end="2022-05-31", interval='1mo')['Adj Close'].rename('SPY') # ### Step 2: Put two time series onto a matrix # We need pandas module, so let's install and import it. https://pandas.pydata.org/ # In[4]: #!pip install pandas # Actually, you have this alread when you isntalled Anaconda. import pandas as pd # In[5]: # combine two asset prices onto one matrix called pandas dataframe data = pd.concat([mystock, index], axis=1) # drop missing observations data2 = data.dropna() # compute monthly returns and drop the first observation data3 = data2.pct_change().dropna() data3 # ### Step 3: Run OLS # We need to install and import statsmodels module. https://www.statsmodels.org/stable/index.html # In[6]: #!pip install statsmodels import statsmodels.formula.api as smf import statsmodels.api as sm # In[7]: # run OLS formula = 'TSLA ~ SPY' # set dep var and indep var results = smf.ols(formula, data3).fit() # run OLS print(results.summary()) # print # ### beta of TSLA = 1.7553 # ### Step 4: Plot the result # We need to install and import matplotlib module. https://matplotlib.org/ # In[8]: #!pip install matplotlib #again, if you installed Anaconda, you have this already. import matplotlib.pyplot as plt fig, ax=plt.subplots(figsize=(10,6)) fig = sm.graphics.plot_partregress_grid(results, fig=fig) # ### Extra 1: using scipy module, we can get the same beta! # In[9]: #!pip install scipy from scipy import stats beta,alpha,r_value,p_value,std_err = stats.linregress(data3['SPY'],data3["TSLA"]) print(beta.round(4)) print(alpha.round(4)) print(r_value.round(2)) print(p_value.round(4)) # ### Extra 2: using a beta formula, we can get the same beta. # # #$$ # \beta_{tsla} = \frac{\sigma_{tsla,spy}}{\sigma_{spy}^2} # $$ # # In[38]: # find covariance matrix cov = data3.cov() * 12 print(cov) print('\n') # to give a space print(round(cov.iloc[0,1]/cov.iloc[1,1], 4)) # ### Extra3: using linear algebra, we can get the same beta. # Need to install numpy and import it. You probably have this alreay. So skip installation. Just import it. https://numpy.org/ # In[32]: # warnings are annoying, so I include below to supress them. You do not need to do this. import warnings warnings.simplefilter(action='ignore', category=FutureWarning) # In[34]: import numpy as np X = data3['SPY'] y = data3['TSLA'] X_ols = sm.add_constant(X) # add a constant vector #print(X_ols) # compute beta using matrix operation beta = np.linalg.inv(X_ols.T.dot(X_ols)).dot(X_ols.T.dot(y)) print(round(beta[1], 4)) # In[ ]: