pip install yfinance
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Collecting yfinance Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 59.2/59.2 KB 837.0 kB/s eta 0:00:00 Requirement already satisfied: lxml>=4.9.1 in /usr/local/lib/python3.9/dist-packages (from yfinance) (4.9.2) Collecting beautifulsoup4>=4.11.1 Downloading beautifulsoup4-4.11.2-py3-none-any.whl (129 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.4/129.4 KB 4.1 MB/s eta 0:00:00 Requirement already satisfied: numpy>=1.16.5 in /usr/local/lib/python3.9/dist-packages (from yfinance) (1.22.4) Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from yfinance) (0.0.11) Collecting requests>=2.26 Downloading requests-2.28.2-py3-none-any.whl (62 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.8/62.8 KB 1.8 MB/s eta 0:00:00 Collecting frozendict>=2.3.4 Downloading frozendict-2.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (112 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 112.8/112.8 KB 2.3 MB/s eta 0:00:00 Collecting cryptography>=3.3.2 Downloading cryptography-39.0.2-cp36-abi3-manylinux_2_28_x86_64.whl (4.2 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.2/4.2 MB 18.2 MB/s eta 0:00:00 Collecting html5lib>=1.1 Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 112.2/112.2 KB 2.8 MB/s eta 0:00:00 Requirement already satisfied: pytz>=2022.5 in /usr/local/lib/python3.9/dist-packages (from yfinance) (2022.7.1) Collecting appdirs>=1.4.4 Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB) Requirement already satisfied: pandas>=1.3.0 in /usr/local/lib/python3.9/dist-packages (from yfinance) (1.4.4) Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.9/dist-packages (from beautifulsoup4>=4.11.1->yfinance) (2.4) Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.9/dist-packages (from cryptography>=3.3.2->yfinance) (1.15.1) Requirement already satisfied: webencodings in /usr/local/lib/python3.9/dist-packages (from html5lib>=1.1->yfinance) (0.5.1) Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.9/dist-packages (from html5lib>=1.1->yfinance) (1.15.0) Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=1.3.0->yfinance) (2.8.2) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26->yfinance) (1.26.15) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26->yfinance) (2.10) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26->yfinance) (2022.12.7) Collecting charset-normalizer<4,>=2 Downloading charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (199 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.2/199.2 KB 7.3 MB/s eta 0:00:00 Requirement already satisfied: pycparser in /usr/local/lib/python3.9/dist-packages (from cffi>=1.12->cryptography>=3.3.2->yfinance) (2.21) Installing collected packages: appdirs, html5lib, frozendict, charset-normalizer, beautifulsoup4, requests, cryptography, yfinance Attempting uninstall: html5lib Found existing installation: html5lib 1.0.1 Uninstalling html5lib-1.0.1: Successfully uninstalled html5lib-1.0.1 Attempting uninstall: beautifulsoup4 Found existing installation: beautifulsoup4 4.9.3 Uninstalling beautifulsoup4-4.9.3: Successfully uninstalled beautifulsoup4-4.9.3 Attempting uninstall: requests Found existing installation: requests 2.25.1 Uninstalling requests-2.25.1: Successfully uninstalled requests-2.25.1 Successfully installed appdirs-1.4.4 beautifulsoup4-4.11.2 charset-normalizer-3.1.0 cryptography-39.0.2 frozendict-2.3.5 html5lib-1.1 requests-2.28.2 yfinance-0.2.12
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import datetime as dt
import urllib.request # handle online files
import zipfile
import os
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from pylab import mpl, plt
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'DejaVu Sans'
mpl.rcParams["savefig.dpi"] = 500
np.set_printoptions(precision=5, suppress=True, formatter={"float": lambda x: f"{x:6.3f}"})
%matplotlib inline
<ipython-input-2-4039a680b4e1>:20: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn')
#help(stats.linregress)
def get_prices(tickers, freq_p, st_day, end_day):
mystock = pd.DataFrame()
for t in tickers:
mystock[t] = yf.download(t, start=st_day, end=end_day, interval=freq_p)['Adj Close']
return mystock
tic=['TSLA', 'SPY']
prices= get_prices(tic, freq_p='1wk', st_day="2011-01-01", end_day="2022-05-31") # id , 1wk, 1mo
print(prices)
prices.rename(columns={"TSLA": "Your_Stock", "SPY":"INDEX"}, inplace=True)
prices.info()
prices.tail(20)
[*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed TSLA SPY Date 2011-01-01 1.882667 101.180954 2011-01-08 1.716667 102.899910 2011-01-15 1.536000 102.159821 2011-01-22 1.600667 101.642517 2011-01-29 1.564000 104.372192 ... ... ... 2022-04-30 288.549988 406.030701 2022-05-07 256.529999 396.534882 2022-05-14 221.300003 384.600952 2022-05-21 253.210007 409.900116 2022-05-28 252.753326 407.600189 [596 rows x 2 columns] <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 596 entries, 2011-01-01 to 2022-05-28 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Your_Stock 596 non-null float64 1 INDEX 596 non-null float64 dtypes: float64(2) memory usage: 14.0 KB
Your_Stock | INDEX | |
---|---|---|
Date | ||
2022-01-15 | 314.633331 | 430.987976 |
2022-01-22 | 282.116669 | 434.894592 |
2022-01-29 | 307.773346 | 441.536835 |
2022-02-05 | 286.666656 | 433.428345 |
2022-02-12 | 285.660004 | 427.297791 |
2022-02-19 | 269.956665 | 430.761627 |
2022-02-26 | 279.429993 | 425.270691 |
2022-03-05 | 265.116669 | 413.363861 |
2022-03-12 | 301.796661 | 437.423523 |
2022-03-19 | 336.880005 | 446.846985 |
2022-03-26 | 361.529999 | 447.074036 |
2022-04-02 | 341.829987 | 441.793091 |
2022-04-09 | 328.333344 | 432.139313 |
2022-04-16 | 335.016663 | 420.540985 |
2022-04-23 | 290.253326 | 406.682190 |
2022-04-30 | 288.549988 | 406.030701 |
2022-05-07 | 256.529999 | 396.534882 |
2022-05-14 | 221.300003 | 384.600952 |
2022-05-21 | 253.210007 | 409.900116 |
2022-05-28 | 252.753326 | 407.600189 |
print(prices.isnull().sum())
prices.dropna(how='any', inplace=True)
print(prices.isnull().sum())
prices.info()
print(prices)
Your_Stock 0 INDEX 0 dtype: int64 Your_Stock 0 INDEX 0 dtype: int64 <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 596 entries, 2011-01-01 to 2022-05-28 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Your_Stock 596 non-null float64 1 INDEX 596 non-null float64 dtypes: float64(2) memory usage: 14.0 KB Your_Stock INDEX Date 2011-01-01 1.882667 101.180954 2011-01-08 1.716667 102.899910 2011-01-15 1.536000 102.159821 2011-01-22 1.600667 101.642517 2011-01-29 1.564000 104.372192 ... ... ... 2022-04-30 288.549988 406.030701 2022-05-07 256.529999 396.534882 2022-05-14 221.300003 384.600952 2022-05-21 253.210007 409.900116 2022-05-28 252.753326 407.600189 [596 rows x 2 columns]
prices.plot(figsize=(10,6))
plt.legend()
<matplotlib.legend.Legend at 0x7f6d43cc6100>
(prices / prices.iloc[0] * 1).plot(figsize=(10, 6), subplots=False) #cumulative returns
plt.title('Cumulative Return')
plt.tight_layout()
prices.pct_change(1).dropna()
plt.scatter(prices['Your_Stock'], prices['INDEX'], color = 'red', alpha=0.2)
<matplotlib.collections.PathCollection at 0x7f6d0c53ff40>
returns = prices.pct_change(1).dropna()
plt.hist(returns['INDEX'], label='INDEX', bins=100)
plt.hist(returns['Your_Stock'], label='Stock', bins=100)
plt.legend(loc='upper right')
plt.title('Overlapping')
plt.show()
returns
Your_Stock | INDEX | |
---|---|---|
Date | ||
2011-01-08 | -0.088173 | 0.016989 |
2011-01-15 | -0.105243 | -0.007192 |
2011-01-22 | 0.042101 | -0.005064 |
2011-01-29 | -0.022907 | 0.026856 |
2011-02-05 | -0.008951 | 0.014945 |
... | ... | ... |
2022-04-30 | -0.005868 | -0.001602 |
2022-05-07 | -0.110969 | -0.023387 |
2022-05-14 | -0.137333 | -0.030096 |
2022-05-21 | 0.144193 | 0.065780 |
2022-05-28 | -0.001804 | -0.005611 |
595 rows × 2 columns
beta,alpha,r_value,p_value,std_err = stats.linregress(returns['INDEX'],returns["Your_Stock"])
print(beta.round(4))
print(alpha.round(4))
print(r_value.round(2))
print(p_value.round(4))
1.5397 0.0071 0.45 0.0
formula = 'Your_Stock ~ INDEX'
results = smf.ols(formula, returns).fit()
print(results.summary())
OLS Regression Results ============================================================================== Dep. Variable: Your_Stock R-squared: 0.205 Model: OLS Adj. R-squared: 0.204 Method: Least Squares F-statistic: 152.8 Date: Thu, 16 Mar 2023 Prob (F-statistic): 2.18e-31 Time: 23:16:11 Log-Likelihood: 756.72 No. Observations: 595 AIC: -1509. Df Residuals: 593 BIC: -1501. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.0071 0.003 2.527 0.012 0.002 0.013 INDEX 1.5397 0.125 12.360 0.000 1.295 1.784 ============================================================================== Omnibus: 110.210 Durbin-Watson: 1.918 Prob(Omnibus): 0.000 Jarque-Bera (JB): 341.677 Skew: 0.874 Prob(JB): 6.40e-75 Kurtosis: 6.275 Cond. No. 44.7 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
You need to take a look at the shape of the data you are feeding into .fit().
Here x.shape = (10,) but we need it to be (10, 1), see sklearn. Same goes for y. So we reshape:
X = returns.loc[:, ['INDEX']].values
y = returns.loc[:, ['Your_Stock']].values
X.shape
(595, 1)
type(X)
numpy.ndarray
X.dtype
dtype('float64')
y.shape
(595, 1)
reg = LinearRegression(fit_intercept=True).fit(X,y)
reg.predict(X[0].reshape(-1,1))
array([[ 0.033]])
reg.predict(X[0:10])
array([[ 0.033], [-0.004], [-0.001], [ 0.048], [ 0.030], [ 0.024], [-0.018], [ 0.009], [-0.012], [-0.029]])
score = reg.score(X, y)
print(score)
0.2048562366630433
reg.coef_
array([[ 1.540]])
reg.intercept_
array([ 0.007])
m = reg.coef_[0,0]
b = reg.intercept_[0]
m
1.5397127262862014
b
0.007084853239936928
# following slope intercept form
print("formula: y = {:.4f}X + {:.4f}".format(m, b) )
formula: y = 1.5397X + 0.0071
from numpy.ma.core import flatten_structured_array
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize = (10,7));
ax.scatter(X, y, color='blue', s=10);
ax.plot(X, reg.predict(X), color='red',linewidth=2);
ax.grid(True,
axis = 'both',
zorder = 0,
linestyle = ':',
color = 'y')
ax.tick_params(labelsize = 20)
ax.set_xlabel('INDEX', fontsize = 24)
ax.set_ylabel('Your_Stock', fontsize = 24)
ax.set_title("Linear Regression Line with Intercept y = {:.4f} + {:.4f}x (R2 = {:.2f})".format(b, m, score), fontsize = 16 )
fig.tight_layout()
#fig.savefig('images/linearregression', dpi = 300)