%load_ext autoreload
%autoreload 2
%load_ext watermark
from pathlib import Path
import time
from pprint import pprint, pformat
import pandas as pd
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
import numba as nb
from arch.bootstrap import IIDBootstrap, MovingBlockBootstrap, CircularBlockBootstrap
# import visual tools
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
import seaborn as sns
# import util libs
from tqdm import tqdm, tqdm_notebook
import warnings
warnings.filterwarnings("ignore")
#import sys
#sys.path.append("..")
#from src.tools.pystore_tools import *
#from src.tools.utils import *
%watermark -v -m -g
print()
%watermark --iversions
sns_params = {
'font.size':9.5,
'font.weight':'medium',
'figure.figsize':(10,7),
}
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams["savefig.dpi"] = 100
plt.style.use('seaborn-talk')
#plt.style.use('bmh')
#plt.style.use('dark_background')
sns.set_context(sns_params)
savefig_kwds=dict(dpi=300, bbox_inches='tight', frameon=True, format='png')
nanex_colors = ("#f92b20", "#fe701b", "#facd1f", "#d6fd1c", "#65fe1b",
"#1bfe42", "#1cfdb4", "#1fb9fa", "#1e71fb", "#261cfd")
nanex_cmap = mpl.colors.ListedColormap(nanex_colors,name='nanex_cmap')
plt.register_cmap('nanex_cmap', cmap=nanex_cmap)
def plot_autocorr(s, lags=50, figsize=(10,7), title=None):
fig = plt.figure(figsize=figsize)
layout = 2, 2
acf_ax = plt.subplot2grid(layout, (0, 0))
abs_acf_ax = plt.subplot2grid(layout, (0, 1))
pacf_ax = plt.subplot2grid(layout, (1, 0))
squared_ax = plt.subplot2grid(layout, (1, 1))
sm.graphics.tsa.plot_acf(s, fft=True, zero=False, lags=lags, ax=acf_ax,
title='Autocorrelation of Returns');
sm.graphics.tsa.plot_acf(s.abs(), fft=True, zero=False,
lags=lags, ax=abs_acf_ax,
title='Autocorrelation of Absolute Returns');
sm.graphics.tsa.plot_pacf(s, zero=False, lags=lags, ax=pacf_ax,
title='Partial Autocorrelation of Returns');
sm.graphics.tsa.plot_acf(s**2, fft=True, zero=False,
lags=lags, ax=squared_ax,
title='Autocorrelation of Squared Returns');
if title: fig.suptitle(title, fontweight='demi', fontsize=16)
fig.tight_layout()
fig.subplots_adjust(top=0.88)
return
def plot_mean_dist(returns, sim_returns, ax):
mean_return = returns.mean()
ax.set_title(f'{returns.name} mean return: {mean_return:.4f}')
sim_means = sim_returns.mean().squeeze()
g = sns.distplot(sim_means, kde=False, ax=ax)
g.axvline(mean_return, color='r')
return
def plot_std_dist(returns, sim_returns, ax):
std = returns.std()
ax.set_title(f'{returns.name} return std: {std:.4f}')
sim_stds = sim_returns.std().squeeze()
g = sns.distplot(sim_stds, kde=False, ax=ax)
g.axvline(std, color='r')
return
def plot_min_dist(returns, sim_returns, ax):
min_ = returns.min()
ax.set_title(f'{returns.name} return min: {min_:.4f}')
sim_mins = sim_returns.min().squeeze()
g = sns.distplot(sim_mins, kde=False, ax=ax)
g.axvline(min_, color='r')
return
def plot_max_dist(returns, sim_returns, ax):
max_ = returns.max()
ax.set_title(f'{returns.name} return max: {max_:.4f}')
sim_maxs = sim_returns.max().squeeze()
g = sns.distplot(sim_maxs, kde=False, ax=ax)
g.axvline(max_, color='r')
return
def plot_autocorr_dist(returns, sim_returns, ax):
autocorr = returns.autocorr()
ax.set_title(f'{returns.name} return autocorr: {autocorr:.4f}')
sim_autocorrs = sim_returns.apply(pd.Series.autocorr).squeeze()
g = sns.distplot(sim_autocorrs, kde=False, ax=ax)
g.axvline(autocorr, color='r')
return
def plot_stat_dist(returns, sim_returns, figsize=(10,7)):
fig = plt.figure(figsize=figsize, constrained_layout=True)
gs = fig.add_gridspec(3, 2)
plot_mean_dist(returns, sim_returns, fig.add_subplot(gs[0,0]))
plot_std_dist(returns, sim_returns, fig.add_subplot(gs[0,1]))
plot_min_dist(returns, sim_returns, fig.add_subplot(gs[1,0]))
plot_max_dist(returns, sim_returns, fig.add_subplot(gs[1,1]))
plot_autocorr_dist(returns, sim_returns, fig.add_subplot(gs[2,:]))
fig.suptitle(f'{returns.name} simulated stat distributions',
fontweight='demi', fontsize=16)
fig.tight_layout()
fig.subplots_adjust(top=0.88)
return
def to_returns(s): return np.log(s/s.shift(1)).dropna()
def to_price_index(df, start=100):
return (start * (np.exp(df.cumsum())))
def cdescribe(x, n_cols=None):
if not n_cols:
d = x.describe()
d.loc['skew'] = x.skew()
d.loc['kurtosis'] = x.kurtosis()
return d
else:
x_ = x.sample(n_cols, axis=1)
d = x_.describe()
d.loc['skew'] = x_.skew()
d.loc['kurtosis'] = x_.kurtosis()
return d
def CBB(s, blocksize, N_paths):
sim_returns = []
bs = CircularBlockBootstrap(blocksize, s)
for i, data in enumerate(tqdm(bs.bootstrap(N_paths))):
tmp = data[0][0].reset_index(drop=True)
sim_returns.append(tmp)
simulations = pd.concat(sim_returns, axis=1, ignore_index=True)
return simulations
def MBB(s, blocksize, N_paths):
sim_returns = []
bs = MovingBlockBootstrap(blocksize, s)
for i, data in enumerate(tqdm(bs.bootstrap(N_paths))):
tmp = data[0][0].reset_index(drop=True)
sim_returns.append(tmp)
simulations = pd.concat(sim_returns, axis=1, ignore_index=True)
return simulations
def IIDB(s, N_paths):
sim_returns = []
bs = IIDBootstrap(s)
for i, data in enumerate(tqdm(bs.bootstrap(N_paths))):
tmp = data[0][0].reset_index(drop=True)
sim_returns.append(tmp)
simulations = pd.concat(sim_returns, axis=1, ignore_index=True)
return simulations
def compare_stats(x, y, n_cols=None):
pd.options.display.float_format = '{:,.4f}'.format
data = (pd.concat([cdescribe(x), cdescribe(y, n_cols=n_cols)], axis=1))
return data
def view_all(real, sims, n_cols=20, cmap=None):
plt.set_cmap(cmap)
display(compare_stats(real, sims, n_cols=20))
plot_stat_dist(real, sims)
plot_autocorr(real, title=f'{real.name} Real Returns')
rand_col = np.random.randint(len(sims.columns), size=1)[0]
plot_autocorr(sims[rand_col],
title=f'Simulated Return Path {rand_col}')
return
def plot_realizations(real, sims, start,
n_plot_paths=50, figsize=(10,7), cmap=None):
plt.set_cmap(cmap)
sim_prices = to_price_index(sims, start=start)
fig, ax = plt.subplots(figsize=figsize)
(sim_prices.sample(n_plot_paths, axis=1)
.plot(legend=False, alpha=0.7, lw=1., ax=ax))
(to_price_index(real.reset_index(drop=True), start=start)
.plot(legend=True, ax=ax, lw=5, ls='--', color='k'))
plt.title(f'{real.name} {n_plot_paths} simulated price paths')
def cprint(df, nrows=None, sample=False):
"""
custom print function to view pandas and dask dataframes
:param df: dataframe
:param nrows: number of rows to return
:param sample: bool, return random sample for view
:return:
"""
if not isinstance(df, pd.DataFrame):
try:
df = df.to_frame()
except:
raise ValueError('object cannot be coerced to df')
if not nrows: nrows = 5
print('-' * 79)
print('dataframe information')
print('-' * 79)
if sample:
print(df.sample(nrows))
else:
print(df.tail(nrows))
print('-' * 50)
print(df.info())
print('-' * 79)
print()
nq = pd.read_csv('NQ.csv', index_col=0)
gc = pd.read_csv('GC.csv', index_col=0)
nq_price = nq['C']
gc_price = gc['C']
nq_returns = to_returns(nq_price).dropna()
gc_returns = to_returns(gc_price).dropna()
nq_returns.name = "NQ - NASDAQ 100 Futures"
gc_returns.name = "GC - Gold Futures"
cprint(nq_returns)
cprint(nq_price)
cprint(gc_returns)
cprint(gc_price)
N_paths = 1000
block_size = 4000
nq_sim_cbb = CBB(nq_returns, blocksize=block_size, N_paths=N_paths)
view_all(nq_returns, nq_sim_cbb, cmap=None)
def plot_prices(real, sims, start, price_index,
n_plot_paths=50, figsize=(10,7), cmap=None):
plt.set_cmap(cmap)
sim_prices = to_price_index(sims, start=start)
sim_prices.index = price_index
display(sim_prices)
fig, ax = plt.subplots(figsize=figsize)
(sim_prices.sample(n_plot_paths, axis=1)
.plot(legend=False, alpha=0.7, lw=1., ax=ax))
real_prices = to_price_index(real.reset_index(drop=True), start=start)
real_prices.index = price_index
real_prices.plot(legend=True, ax=ax, lw=4, ls='--', color='k')
plt.title(f'{real.name} {n_plot_paths} simulated price paths')
plt.xticks(rotation=10)
plot_prices(nq_returns, nq_sim_cbb, start=7500, price_index=nq.index[1:], n_plot_paths=30, cmap='nanex_cmap')