#!/usr/bin/env python # coding: utf-8 # In[1]: import warnings warnings.filterwarnings("ignore") import numpy as np import pandas as pd import pandas_datareader.data as web import scipy.stats as scs import matplotlib as mpl from matplotlib import cm import matplotlib.pyplot as plt from matplotlib.dates import YearLocator, MonthLocator import seaborn as sns import os, gc, sys get_ipython().run_line_magic('matplotlib', 'inline') import sklearn.mixture as mix from markovchain_plot import MarkovChain from hmmlearn.hmm import GaussianHMM from tqdm import tqdm import datetime SEED = 515 np.set_printoptions(precision=8, suppress=True, threshold=np.inf) # ## Markov sample visualization # In[2]: P = np.array([[0.5, 0.5], [0.1, 0.9]]) mc = MarkovChain(P, ['A', 'B']) mc.draw() # In[3]: P = np.array([[0.3, 0.3, 0.2, 0.2], [0.1, 0.05, 0.05, 0.8], [0.5, 0.1, 0.2, 0.2], [0.4, 0.1, 0.45, 0.05] ]) mc = MarkovChain(P, ['A', 'B', 'C', 'D']) mc.draw() # ## Markov chain application # # The example is based on Fraud detection. Assuming that we have 1-year performance of portfolio, we can compute the path (i.e. P(Good Loans -> Bad Loans) = 3%) and construct the transition matrix, in this example, based on the portfolio, we can have Good Loans, Risky Loans, Bad Loans, and Paid-Up. # # In[4]: P = np.array([[0.75, 0.06, 0.15, 0.04], [0.05, 0.58, 0.02, 0.35], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0] ]) mc = MarkovChain(P, ['GL', 'RL', 'PU', 'BL']) mc.draw() # Now let's simulate three different portfolios # In[5]: portA = np.array([0.9, 0.1, 0, 0]) portB = np.array([0.5, 0.5, 0, 0]) portC = np.array([0.2, 0.8, 0, 0]) # In[6]: def simulate_loan_result(port: np.ndarray, n_year: int): print(f'Starting port consists of Good loans: {port[0]}, risky loan: {port[1]}') for i in range(1, n_year+1): if i == 1: _out = np.dot(port, P) else: _out = np.dot(_out, P) _out = np.round(_out, 6) print(f'Year {i}: probs are {_out}; Paid up: {_out[2]}, Bad Loan: {_out[3]}') return None # In[7]: portA = np.array([0.9, 0.1, 0, 0]) portB = np.array([0.5, 0.5, 0, 0]) portC = np.array([0.2, 0.8, 0, 0]) simulate_loan_result(portA, 10) simulate_loan_result(portB, 10) simulate_loan_result(portC, 10) # ## Hidden Markov model for Regime Detection # # Let's see how HMM can be used in quantitative finance. Considering that the largest issue we face when trying to apply predictive techniques to asset returns is __nonstationary time series__. In brief, this means that the expected mean and volatility of asset returns changes over time. # # Most of the time series models and techniques assume that the data is stationary, which is a major weakness of these models. # #
# # Instead, let's frame the problem differently, we know that the time series exhibit temporary periods where the expected means and variances are stable through time. __These periods or regimes can be linked to hidden states of HMM__. # # If that is the case, then all we need are observable variables whose behavior allows us to _infer_ the true hidden states. If we can better estimate an asset's most likely regime, including the associated means and variances, then our predictive models become more adaptable and will likely improve. We can also use the estimated regime parameters for better scenario analysis. # #
# # In this example, the observable variables I use are: the underlying asset returns, the ICE BofA US High Yield Index Total Return Index, the Ted Spread, the 10 year - 2 year constant maturity spread, and the 10 year - 3 month constant maturity spread. # Getting the stock data starting from 2010 onwards # In[8]: mkt = 'GE' # stock to watch f1 = 'TEDRATE' # ted spread (ref: https://fred.stlouisfed.org/series/TEDRATE) f2 = 'T10Y2Y' # constant maturity ten yer - 2 year f3 = 'T10Y3M' # constant maturity 10yr - 3m f4 = 'BAMLHYH0A0HYM2TRIV' # ICE BofA US High Yield Index Total Return Index Value start = datetime.date(2010, 1, 1) end = datetime.date.today() mkt_df = web.DataReader([mkt], 'yahoo', start, end)['Adj Close']\ .rename(columns={mkt: mkt})\ .assign(sret=lambda x: np.log(x[mkt] / x[mkt].shift(1)))\ .dropna() data_df = web.DataReader([f1, f2, f3, f4], 'fred', start, end)\ .join(mkt_df, how='inner')\ .dropna() display(data_df.head(5)) plt.figure(figsize=(10, 6)) plt.plot(data_df[mkt]) plt.show() # In this notebook, I will try using 2 methods: `sklearn's GaussianMixture` and `HMMLearn's GaussianHMM`. # #
# # Both models require us to specify the number of components to fit to the time series, these components can be thought of as regimes. For this specific example, we will assign 3 components and assume to be high, neural and low volatility. # In[9]: N_COMPONENTS = 3 COL_ = 'sret' FT_COLS = [f1, f2, f3, f4, COL_] def plot_in_sample_hidden_states(hmm_model, df, stock_col: str=mkt, return_col: str='sret'): """ Plot the adjusted closing prices masked by the in-sample hidden states as a mechanism to understand the market regimes. """ sret = np.column_stack([df[FT_COLS].values]) hidden_states = hmm_model.predict(sret) fig, axs = plt.subplots(hmm_model.n_components, sharex=True, sharey=True, figsize=(18, 14)) colours = cm.rainbow(np.linspace(0, 1, hmm_model.n_components)) for i, (ax, colour) in enumerate(zip(axs, colours)): mask = hidden_states == i ax.plot_date( df.index[mask], df.loc[:, stock_col][mask], ".", linestyle='none', c=colour ) ax.set_title("Hidden State #%s" % i) ax.xaxis.set_major_locator(YearLocator()) ax.xaxis.set_minor_locator(MonthLocator()) ax.grid(True) plt.show() return # ### Gaussian Mixture # # We can `sklearn's GausianMixture` to fit the model that estimates these regimes. The mixture models implement a closely related unsupervised form of density estimation by utilizing the expectation-maximization algorithm to estimate the means and covariances of the hidden states. # In[10]: train_df = data_df.loc[: '2019-01-01'].dropna() test_df = data_df.loc['2019-01-01': ].dropna() X_train = train_df[FT_COLS].values X_test = test_df[FT_COLS].values model = mix.GaussianMixture(n_components=N_COMPONENTS, covariance_type="full", n_init=100, random_state=SEED).fit(X_train) # Predict the optimal sequence of internal hidden state hidden_states = model.predict(X_train) print("Means and vars of each hidden state") for i in range(model.n_components): print(f'{i}th hidden state') print('mean: ', (model.means_[i])) print('var: ', np.diag(model.covariances_[i])) print() sns.set(font_scale=1.25) style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3, 'legend.frameon': True} sns.set_style('white', style_kwds) fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True, figsize=(12,9)) colors = cm.rainbow(np.linspace(0, 1, model.n_components)) for i, (ax, color) in enumerate(zip(axs, colors)): # Use fancy indexing to plot data in each state. mask = hidden_states == i ax.plot_date(train_df.index.values[mask], train_df[COL_].values[mask], ".-", c=color) ax.set_title("{0}th hidden state".format(i), fontsize=16, fontweight='demi') # Format the ticks. ax.xaxis.set_major_locator(YearLocator()) ax.xaxis.set_minor_locator(MonthLocator()) sns.despine(offset=10) plt.tight_layout() # We can refer to the last position in `mean` and `var` values, which referred to log return of the stock. This is the daily expected mean and variance of GE stock returns. # #
# # It appears the 1st hidden state represent the neutral volatility, the largest expected return and medium variance. The last hidden state represents the high volatility regime, with the highest variance with negative returns. # In[11]: plot_in_sample_hidden_states(model, train_df) # In[12]: sns.set(font_scale=1.5) states = (pd.DataFrame(hidden_states, columns=['states'], index=train_df.index) .join(train_df, how='inner') .assign(mkt_cret=train_df.sret.cumsum()) .reset_index(drop=False) .rename(columns={'index':'Date'})) display(states.head()) sns.set_style('white', style_kwds) order = np.arange(model.n_components) fg = sns.FacetGrid(data=states, hue='states', hue_order=order, palette=colors, aspect=1.31, height=12) fg.map(plt.scatter, 'Date', mkt, alpha=0.8).add_legend() sns.despine(offset=10) fg.fig.suptitle(f'Historical {mkt} Regimes', fontsize=24, fontweight='demi') plt.show() # In[13]: hidden_states_test = model.predict(X_test) sns.set(font_scale=1.5) states = (pd.DataFrame(hidden_states_test, columns=['states'], index=test_df.index) .join(test_df, how='inner') .assign(mkt_cret=test_df.sret.cumsum()) .reset_index(drop=False) .rename(columns={'index':'Date'})) display(states.head()) sns.set_style('white', style_kwds) order = np.arange(model.n_components) fg = sns.FacetGrid(data=states, hue='states', hue_order=order, palette=colors, aspect=1.31, height=12) fg.map(plt.scatter, 'Date', mkt, alpha=0.8).add_legend() sns.despine(offset=10) fg.fig.suptitle(f'Historical {mkt} Regimes', fontsize=24, fontweight='demi') plt.show() # ### HMMLearn Gaussian HMM # # For more information on the library, please refer to this [link](https://hmmlearn.readthedocs.io/en/latest/). # In[14]: _df = np.column_stack([train_df[FT_COLS].values]) hmm_model = GaussianHMM(n_components=3, covariance_type="full", n_iter=1000, random_state=SEED).fit(_df) hidden_states = hmm_model.predict(_df) print("Means and vars of each hidden state") for i in range(hmm_model.n_components): print(f'{i}th hidden state') print('mean: ', (hmm_model.means_[i])) print('var: ', np.diag(hmm_model.covars_[i])) print() sns.set(font_scale=1.25) style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3, 'legend.frameon': True} sns.set_style('white', style_kwds) fig, axs = plt.subplots(hmm_model.n_components, sharex=True, sharey=True, figsize=(12,9)) colors = cm.rainbow(np.linspace(0, 1, model.n_components)) for i, (ax, color) in enumerate(zip(axs, colors)): # Use fancy indexing to plot data in each state. mask = hidden_states == i ax.plot_date(train_df.index.values[mask], train_df[COL_].values[mask], ".-", c=color) ax.set_title("{0}th hidden state".format(i), fontsize=16, fontweight='demi') # Format the ticks. ax.xaxis.set_major_locator(YearLocator()) ax.xaxis.set_minor_locator(MonthLocator()) sns.despine(offset=10) plt.tight_layout() # In[15]: sns.set(font_scale=1.5) states = (pd.DataFrame(hidden_states, columns=['states'], index=train_df.index) .join(train_df, how='inner') .assign(mkt_cret=train_df.sret.cumsum()) .reset_index(drop=False) .rename(columns={'index':'Date'})) display(states.head()) sns.set_style('white', style_kwds) order = np.arange(hmm_model.n_components) fg = sns.FacetGrid(data=states, hue='states', hue_order=order, palette=colors, aspect=1.31, height=12) fg.map(plt.scatter, 'Date', mkt, alpha=0.8).add_legend() sns.despine(offset=10) fg.fig.suptitle(f'Historical {mkt} Regimes', fontsize=24, fontweight='demi') plt.show() # In[16]: plot_in_sample_hidden_states(hmm_model, train_df) # In[17]: _df = np.column_stack([test_df[FT_COLS].values]) hidden_states_test = hmm_model.predict(_df) sns.set(font_scale=1.5) states = (pd.DataFrame(hidden_states_test, columns=['states'], index=test_df.index) .join(test_df, how='inner') .assign(mkt_cret=test_df.sret.cumsum()) .reset_index(drop=False) .rename(columns={'index':'Date'})) sns.set_style('white', style_kwds) order = np.arange(model.n_components) fg = sns.FacetGrid(data=states, hue='states', hue_order=order, palette=colors, aspect=1.31, height=12) fg.map(plt.scatter, 'Date', mkt, alpha=0.8).add_legend() sns.despine(offset=10) fg.fig.suptitle(f'Historical {mkt} Regimes', fontsize=24, fontweight='demi') plt.show() # In[ ]: # In[ ]: