#Diable the warnings
import warnings
warnings.filterwarnings('ignore')
pip install yfinance
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Requirement already satisfied: yfinance in /usr/local/lib/python3.7/dist-packages (0.1.72) Requirement already satisfied: requests>=2.26 in /usr/local/lib/python3.7/dist-packages (from yfinance) (2.28.1) Requirement already satisfied: lxml>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from yfinance) (4.9.1) Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from yfinance) (1.21.6) Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from yfinance) (0.0.10) Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from yfinance) (1.3.5) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->yfinance) (2022.1) Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->yfinance) (2.8.2) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24.0->yfinance) (1.15.0) Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (2.0.12) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (2.10) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (2022.6.15) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26->yfinance) (1.24.3)
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import yfinance as yf
import scipy as sp
from scipy.stats import norm
from pylab import plt, mpl
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import scatter_matrix
from statsmodels.graphics.tsaplots import plot_acf
#plt.style.use('seaborn')
plt.style.use('fivethirtyeight')
mpl.rcParams['font.family'] = 'DejaVu Sans'
%matplotlib inline
pd.set_option('precision', 3)
pd.set_option('display.max_colwidth', 100)
def get_prices(tickers, freq_p, st_day, end_day):
mystock = pd.DataFrame()
for t in tickers:
mystock[t] = yf.download(t, start=st_day, end=end_day, interval=freq_p)['Adj Close']
return mystock
tic=['SPY', 'IEF', 'TLT','TSLA', 'AAPL', 'NVDA', 'BAC']
prices= get_prices(tic, freq_p='1d', st_day="2011-01-01", end_day="2022-05-31") # id , 1wk, 1mo
print(prices)
prices.info()
prices.tail(20)
[*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed [*********************100%***********************] 1 of 1 completed SPY IEF TLT TSLA AAPL NVDA BAC Date 2011-01-03 101.991 76.004 70.107 5.324 10.064 3.632 12.091 2011-01-04 101.935 76.183 70.189 5.334 10.116 3.620 12.134 2011-01-05 102.465 75.362 68.643 5.366 10.199 3.898 12.355 2011-01-06 102.264 75.752 68.943 5.576 10.191 4.437 12.304 2011-01-07 102.063 76.256 69.311 5.648 10.264 4.561 12.142 ... ... ... ... ... ... ... ... 2022-05-23 395.213 102.925 116.123 674.900 143.110 168.944 35.665 2022-05-24 392.196 103.812 118.414 628.160 140.360 161.506 35.446 2022-05-25 395.661 104.022 118.883 658.800 140.520 169.714 35.635 2022-05-26 403.567 103.972 118.345 707.730 143.780 178.472 36.460 2022-05-27 413.474 104.122 118.634 759.630 149.640 188.070 36.808 [2871 rows x 7 columns] <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 2871 entries, 2011-01-03 to 2022-05-27 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SPY 2871 non-null float64 1 IEF 2871 non-null float64 2 TLT 2871 non-null float64 3 TSLA 2871 non-null float64 4 AAPL 2871 non-null float64 5 NVDA 2871 non-null float64 6 BAC 2871 non-null float64 dtypes: float64(7) memory usage: 179.4 KB
SPY | IEF | TLT | TSLA | AAPL | NVDA | BAC | |
---|---|---|---|---|---|---|---|
Date | |||||||
2022-05-02 | 412.697 | 101.639 | 116.741 | 902.94 | 157.728 | 195.289 | 35.933 |
2022-05-03 | 414.589 | 101.758 | 117.528 | 909.25 | 159.246 | 195.979 | 36.917 |
2022-05-04 | 427.215 | 102.416 | 118.175 | 952.62 | 165.776 | 203.297 | 38.399 |
2022-05-05 | 412.030 | 101.389 | 114.938 | 873.28 | 156.540 | 188.400 | 37.325 |
2022-05-06 | 409.571 | 100.811 | 113.244 | 865.65 | 157.280 | 186.711 | 37.236 |
2022-05-09 | 396.457 | 101.559 | 114.240 | 787.11 | 152.060 | 169.464 | 36.162 |
2022-05-10 | 397.373 | 101.898 | 115.276 | 800.04 | 154.510 | 175.913 | 35.555 |
2022-05-11 | 391.061 | 102.526 | 117.498 | 734.00 | 146.500 | 166.265 | 35.366 |
2022-05-12 | 390.652 | 102.865 | 117.279 | 728.00 | 142.560 | 161.716 | 34.869 |
2022-05-13 | 399.992 | 102.327 | 115.545 | 769.59 | 147.110 | 177.023 | 34.969 |
2022-05-16 | 398.369 | 102.755 | 115.426 | 724.37 | 145.540 | 172.604 | 34.611 |
2022-05-17 | 406.564 | 101.978 | 114.031 | 761.61 | 149.240 | 181.732 | 35.784 |
2022-05-18 | 390.175 | 102.706 | 116.462 | 709.81 | 140.820 | 169.344 | 34.680 |
2022-05-19 | 387.785 | 103.075 | 116.741 | 709.42 | 137.350 | 171.204 | 34.253 |
2022-05-20 | 387.954 | 103.553 | 118.066 | 663.90 | 137.590 | 166.905 | 33.666 |
2022-05-23 | 395.213 | 102.925 | 116.123 | 674.90 | 143.110 | 168.944 | 35.665 |
2022-05-24 | 392.196 | 103.812 | 118.414 | 628.16 | 140.360 | 161.506 | 35.446 |
2022-05-25 | 395.661 | 104.022 | 118.883 | 658.80 | 140.520 | 169.714 | 35.635 |
2022-05-26 | 403.567 | 103.972 | 118.345 | 707.73 | 143.780 | 178.472 | 36.460 |
2022-05-27 | 413.474 | 104.122 | 118.634 | 759.63 | 149.640 | 188.070 | 36.808 |
prices.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 2871 entries, 2011-01-03 to 2022-05-27 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SPY 2871 non-null float64 1 IEF 2871 non-null float64 2 TLT 2871 non-null float64 3 TSLA 2871 non-null float64 4 AAPL 2871 non-null float64 5 NVDA 2871 non-null float64 6 BAC 2871 non-null float64 dtypes: float64(7) memory usage: 179.4 KB
prices.plot(figsize=(10, 18), subplots=True)
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f9281c651d0>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9281b98950>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9284d94f10>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9281b06f90>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9281ac9310>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9281a84490>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9281a44b10>], dtype=object)
prices.pct_change().mean().plot(kind='bar', figsize=(10, 6));
prices.pct_change().std().plot(kind='bar', figsize=(10, 6));
rets = np.log(prices / prices.shift(1))
rets.cumsum().apply(np.exp).plot(figsize=(12, 13))
<matplotlib.axes._subplots.AxesSubplot at 0x7f9280219ad0>
rets.plot.scatter("SPY", "TSLA", alpha=0.9);
rets.plot.scatter("SPY", "AAPL", alpha=0.6);
rets.plot.scatter("SPY", "IEF", alpha=0.8);
rets.hist(bins=50, sharex=False, sharey=False, xlabelsize=1, ylabelsize=1, figsize=(12,12))
plt.show()
The above histogram shows the distribution for each series individually. Next, lets look at the density distribution over the same x axis scale.
rets.plot(kind='density', subplots=True, layout=(4,4), sharex=True, legend=True, fontsize=1, figsize=(15,15))
plt.show()
We can see that the vix has a much larger variance compared to the other distributions.
In order to get a sense of the interdependence of the data we look at the scatter plot and the correlation matrix
correlation = rets.corr()
plt.figure(figsize=(15,15))
plt.title('Correlation Matrix')
sns.heatmap(correlation, vmax=1, square=True,annot=True,cmap='cubehelix')
<matplotlib.axes._subplots.AxesSubplot at 0x7f9282b73810>
Looking at the correlation plot above, we see some correlation of the predicted vari‐ able with the lagged 5 days, 15days, 30 days and 60 days return of MSFT.
plt.figure(figsize=(15,15))
scatter_matrix(rets,figsize=(12,12))
plt.show()
<Figure size 1080x1080 with 0 Axes>
!pip install mplcyberpunk
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Requirement already satisfied: mplcyberpunk in /usr/local/lib/python3.7/dist-packages (0.5.1) Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mplcyberpunk) (3.2.2) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplcyberpunk) (1.4.3) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplcyberpunk) (2.8.2) Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplcyberpunk) (1.21.6) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplcyberpunk) (0.11.0) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mplcyberpunk) (3.0.9) Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->mplcyberpunk) (4.1.1) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mplcyberpunk) (1.15.0)
import mplcyberpunk
plt.style.use("cyberpunk")
rets.plot(kind='density', subplots=True, layout=(4,4), sharex=True, legend=True, fontsize=1, figsize=(15,15))
plt.show()
mplcyberpunk.add_glow_effects()
rets = np.log(prices / prices.shift(1))
rets.cumsum().apply(np.exp).plot(figsize=(12, 13))
<matplotlib.axes._subplots.AxesSubplot at 0x7f927c39d450>