#!/usr/bin/env python # coding: utf-8 # Open In Colab # # Running the First Regression in Python # Suppose this is your first time to write the code. Perhaps, you want to run a simple regression using two series of asset prices to fin the equity beta. Let's use a step-by-step approach to complete the task. # # Step 1: Download two assets' prices from the web # Step 2: Put them onto a matrix form # Step 3: Run the OLS # Step 4: Plot data # ### Step 1: Download data # We will use yahoo finance package (https://pypi.org/project/yfinance/) to download Yahoo Finance data from the web. We need to (1) install and (2) import this package. # In[1]: get_ipython().system('pip install yfinance # to install, remove # and run the cell') import yfinance as yf # to import # In[2]: # download mystock = yf.download("TSLA", start="2011-01-01", end="2022-05-31", interval='1mo')['Adj Close'].rename('TSLA') index = yf.download("SPY", start="2011-01-01", end="2022-05-31", interval='1mo')['Adj Close'].rename('SPY') # ### Step 2: Put two time series onto a matrix # We need pandas module, so let's install and import it. https://pandas.pydata.org/ # In[3]: #!pip install pandas # Actually, you have this alread when you isntalled Anaconda. import pandas as pd # In[4]: # combine two asset prices onto one matrix called pandas dataframe data = pd.concat([mystock, index], axis=1) # drop missing observations data2 = data.dropna() # compute monthly returns and drop the first observation data3 = data2.pct_change().dropna() data3 # ### Let's plot data. # In[5]: # need to import matplotlib. You already have this in your Jupyter environment, so no need to install. import matplotlib.pyplot as plt data3.plot(subplots=False, figsize=(10, 6)) # plot returns to see volatility levels (data2 / data2.iloc[0] * 100).plot(figsize = (10, 6), subplots=False) # plot the wealth change of $100 investment over time # In[6]: data3.plot.hist(bins=50, alpha=0.7, edgecolor='black', subplots=False, figsize=(10,6)) data3.plot.scatter(x='SPY', y='TSLA', c='blue',figsize=(10,6)) # ### Step 3: Run OLS # We need to install and import statsmodels module. https://www.statsmodels.org/stable/index.html # In[7]: #!pip install statsmodels import statsmodels.formula.api as smf import statsmodels.api as sm # In[8]: # run OLS formula = 'TSLA ~ SPY' # set dep var and indep var results = smf.ols(formula, data3).fit() # run OLS print(results.summary()) # print # ### beta of TSLA = 1.7553 # ### Step 4: Plot the result # We need to install and import matplotlib module. https://matplotlib.org/ # In[9]: #!pip install matplotlib #again, if you installed Anaconda, you have this already. import matplotlib.pyplot as plt fig, ax=plt.subplots(figsize=(10,6)) fig = sm.graphics.plot_partregress_grid(results, fig=fig) # ### Extra 1: using scipy module, we can get the same beta! # In[10]: #!pip install scipy # Again, you probabaly have this installed in your Jupyter environment already from scipy import stats beta,alpha,r_value,p_value,std_err = stats.linregress(data3['SPY'],data3["TSLA"]) print(beta.round(4)) print(alpha.round(4)) print(r_value.round(2)) print(p_value.round(4)) # ### Extra 2: using a beta formula, we can get the same beta. # # $$ # \beta_{tsla} = \frac{\sigma_{tsla,spy}}{\sigma_{spy}^2} # $$ # # In[11]: # find covariance matrix cov = data3.cov() * 12 print(cov) print('\n') # to give a space print(round(cov.iloc[0,1]/cov.iloc[1,1], 4)) # ### Extra3: using linear algebra, we can get the same beta. # Need to install numpy and import it. You probably have this alreay. So skip installation. Just import it. https://numpy.org/ # # $$ # b=\begin{bmatrix} b_0 \\ b_1 \\ \vdots \\ b_{k} \end{bmatrix}= (X^{'}X)^{-1}X^{'}Y # $$ # # # So, a beta estimate form OLS is equal to X matrix transpose times X matrix and take an inverse times X transpose times times Y vector. # # In[12]: # warnings are annoying, so I include below to supress them. You do not need to do this. import warnings warnings.simplefilter(action='ignore', category=FutureWarning) # In[56]: import numpy as np X = data3['SPY'] y = data3['TSLA'] X_ols = sm.add_constant(X) # add a constant vector #print(X_ols) # compute beta using matrix operation beta = np.linalg.inv(X_ols.T.dot(X_ols)).dot(X_ols.T.dot(y)) print(round(beta[1], 4)) # In[39]: # In[ ]: # #sklearn # In[57]: type(y) # In[52]: x = np.array(X).reshape((-1, 1)) x.shape # In[53]: y=np.array(y).reshape((-1,1)) y.shape # In[54]: from sklearn.linear_model import LinearRegression model = LinearRegression().fit(x,y) # In[55]: r_sq = model.score(x, y) print(f"coefficient of determination: {r_sq}") print(f"intercept: {model.intercept_}") print(f"slope: {model.coef_}") # In[ ]: # In[ ]: