#!/usr/bin/env python # coding: utf-8 # Open In Colab # # CAPM - Capital Asset Pricing Model # In[1]: pip install yfinance # In[2]: import yfinance as yf import pandas as pd import numpy as np import statsmodels.api as sm import datetime as dt import urllib.request # handle online files import zipfile import os import statsmodels.formula.api as smf import statsmodels.api as sm from scipy import stats from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt from pylab import mpl, plt plt.style.use('seaborn') mpl.rcParams['font.family'] = 'DejaVu Sans' mpl.rcParams["savefig.dpi"] = 500 np.set_printoptions(precision=5, suppress=True, formatter={"float": lambda x: f"{x:6.3f}"}) get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: #help(stats.linregress) # In[3]: def get_prices(tickers, freq_p, st_day, end_day): mystock = pd.DataFrame() for t in tickers: mystock[t] = yf.download(t, start=st_day, end=end_day, interval=freq_p)['Adj Close'] return mystock # In[4]: tic=['TSLA', 'SPY'] prices= get_prices(tic, freq_p='1wk', st_day="2011-01-01", end_day="2022-05-31") # id , 1wk, 1mo print(prices) prices.rename(columns={"TSLA": "Your_Stock", "SPY":"INDEX"}, inplace=True) prices.info() prices.tail(20) # In[5]: print(prices.isnull().sum()) prices.dropna(how='any', inplace=True) print(prices.isnull().sum()) prices.info() print(prices) # In[6]: prices.plot(figsize=(10,6)) plt.legend() # In[7]: (prices / prices.iloc[0] * 1).plot(figsize=(10, 6), subplots=False) #cumulative returns plt.title('Cumulative Return') plt.tight_layout() # In[8]: prices.pct_change(1).dropna() plt.scatter(prices['Your_Stock'], prices['INDEX'], color = 'red', alpha=0.2) # In[9]: returns = prices.pct_change(1).dropna() plt.hist(returns['INDEX'], label='INDEX', bins=100) plt.hist(returns['Your_Stock'], label='Stock', bins=100) plt.legend(loc='upper right') plt.title('Overlapping') plt.show() # In[10]: returns # #using scipy # In[11]: beta,alpha,r_value,p_value,std_err = stats.linregress(returns['INDEX'],returns["Your_Stock"]) # In[12]: print(beta.round(4)) print(alpha.round(4)) print(r_value.round(2)) print(p_value.round(4)) # #using statsmodels # In[13]: formula = 'Your_Stock ~ INDEX' results = smf.ols(formula, returns).fit() print(results.summary()) # #using scikit-learn # You need to take a look at the shape of the data you are feeding into .fit(). # # Here x.shape = (10,) but we need it to be (10, 1), see sklearn. Same goes for y. So we reshape: # In[14]: X = returns.loc[:, ['INDEX']].values # In[15]: y = returns.loc[:, ['Your_Stock']].values # In[16]: X.shape # In[17]: type(X) # In[18]: X.dtype # In[19]: y.shape # In[20]: reg = LinearRegression(fit_intercept=True).fit(X,y) # In[21]: reg.predict(X[0].reshape(-1,1)) # In[22]: reg.predict(X[0:10]) # In[23]: score = reg.score(X, y) print(score) # In[24]: reg.coef_ # In[25]: reg.intercept_ # In[26]: m = reg.coef_[0,0] b = reg.intercept_[0] # In[27]: m # In[28]: b # In[29]: # following slope intercept form print("formula: y = {:.4f}X + {:.4f}".format(m, b) ) # In[30]: from numpy.ma.core import flatten_structured_array fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize = (10,7)); ax.scatter(X, y, color='blue', s=10); ax.plot(X, reg.predict(X), color='red',linewidth=2); ax.grid(True, axis = 'both', zorder = 0, linestyle = ':', color = 'y') ax.tick_params(labelsize = 20) ax.set_xlabel('INDEX', fontsize = 24) ax.set_ylabel('Your_Stock', fontsize = 24) ax.set_title("Linear Regression Line with Intercept y = {:.4f} + {:.4f}x (R2 = {:.2f})".format(b, m, score), fontsize = 16 ) fig.tight_layout() #fig.savefig('images/linearregression', dpi = 300)