import pandas as pd
import plotly as pl
import altair as al
al.data_transformers.disable_max_rows() # remove 5k rows limit
al.themes.enable('dark') # set dark theme
ThemeRegistry.enable('dark')
import yfinance as yf
def get_ticker_history(ticker, print_info = False):
'''
Retrieve Ticker history from yahoo finance.
Argument ticker can be a list of tickers.
'''
if type(ticker)==list: # if list download data for every ticker.
tickers = []
for t in ticker:
temp = get_ticker_history(t).reset_index()
temp['ticker'] = t
tickers.append(temp)
return pd.concat(tickers, axis=0)
ticke = yf.Ticker(ticker)
# get stock info
if print_info:
print(ticke.info['shortName'])
# get historical market data
hist = ticke.history(period="max")
return hist
df = get_ticker_history(['SPY'])
df.to_csv('data.csv')
df.head(2)
Date | Open | High | Low | Close | Volume | Dividends | Stock Splits | ticker | |
---|---|---|---|---|---|---|---|---|---|
0 | 1993-01-29 00:00:00-05:00 | 25.352047 | 25.352047 | 25.225917 | 25.334028 | 1003200 | 0.0 | 0 | SPY |
1 | 1993-02-01 00:00:00-05:00 | 25.352041 | 25.514208 | 25.352041 | 25.514208 | 480500 | 0.0 | 0 | SPY |
## Line Chart data
df['date'] = df.Date
df['month_day'] = [d.month*100 + d.day for d in df.date]
df['year'] = [str(d.year) for d in df.date]
df['year2'] = df.year
# calculate expanding pct change by year
df['ytd_pct_chng'] = df.groupby('year')['Close'].apply(lambda x: x.div(x.iloc[0]).subtract(1))
print(f'Original df {df.shape}:\n',df.head(2),'\n')
## Correlation Chart data
corrMat = df.pivot('month_day','year')['Close'].corr().stack()
corrMat.index.names = ["year", "year2"]
corrMat = corrMat.reset_index().rename(columns={0:'corr'})
corrMat = corrMat[~corrMat[['year','year2']].apply(frozenset, axis=1).duplicated()] # remove duplicate pairs
corrMat
print(f'Correlation df {corrMat.shape}:\n',corrMat.head(),'\n')
## Horizontal Bar data
# calculate percent change
annual = df.groupby('year')['Close'].apply(lambda x: pd.Series.pct_change(x).sum()).reset_index()
print(f'Annual Change df {annual.shape}:\n',annual.head(3))
Original df (7484, 14): Date Open High Low Close \ 0 1993-01-29 00:00:00-05:00 25.352047 25.352047 25.225917 25.334028 1 1993-02-01 00:00:00-05:00 25.352041 25.514208 25.352041 25.514208 Volume Dividends Stock Splits ticker date \ 0 1003200 0.0 0 SPY 1993-01-29 00:00:00-05:00 1 480500 0.0 0 SPY 1993-02-01 00:00:00-05:00 month_day year year2 ytd_pct_chng 0 129 1993 1993 0.000000 1 201 1993 1993 0.007112 Correlation df (465, 3): year year2 corr 0 1993 1993 1.000000 1 1993 1994 0.497354 2 1993 1995 0.878284 3 1993 1996 0.826792 4 1993 1997 0.794767 Annual Change df (30, 2): year Close 0 1993 0.087365 1 1994 0.012181 2 1995 0.321471
## Line Chart
# define base chart
base = al.Chart(df,title="Every Year of SPY, 1993 - 2022").mark_line(interpolate='basis').encode(
x=al.X('monthdate(date):O', title='', axis=al.Axis(labelAngle=-45)),
y=al.Y('ytd_pct_chng:Q', title='Percent Change YTD', axis=al.Axis(format='%')),
detail='year',
color = al.condition("datum.year == '2022'", al.value('red'), al.value('grey')),
tooltip=['date',al.Tooltip('ytd_pct_chng', format=".0%")]
).properties(
width=1100,
height=500
)
# add highlight on hover selector
highlight = al.selection(type='single', on='mouseover',
fields=['year'], nearest=True)
points = base.mark_circle().encode(
opacity=al.value(0)
).add_selection(
highlight
)
# add hover highlight
lines = base.mark_line().encode(
size=al.condition(~highlight, al.value(1), al.value(3))
)
# add 2022 color fill
line22 = base.mark_line().encode(
size=al.condition(al.expr.datum['year'] == '2022', al.value(2), al.value(1))
)
spy_line = (points + lines + line22)
spy_line