#!/usr/bin/env python
# coding: utf-8

# Replicate [Dynamic Return Dependencies Across Industries: A Machine Learning Approach](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3120110&download=yes) by David Rapach, Jack Strauss, Jun Tu and Guofu Zhou.
# 
# 1) Use industry returns from [Ken French](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html)
# 
# 2) Forecast (for example) this month's Chemical industry return using last month's returns from all 30 industries 
# 
# 3) Use LASSO for predictor subset selection over the entire 1960-2016 period to determine that e.g. Beer is predicted by Food, Clothing, Coal
# 
# 4) Use LASSO-selected predictors and simple linear regression to predict returns
# 
# 5) Generate portfolios and run backtests.
# 
# - Predictor selection - finds same predictors except 2 industries. Possibly use of AICc instead of AIC (don't see an sklearn implementation that uses AICc)
# 
# - Prediction by industry - R-squareds line up pretty closely
# 
# - Portfolio performance, similar ballpark results. Maybe AICc/AIC; Also paper standardizes predictors, which is pretty standard. Finally, for some reason their mean returns don't line up to geometric mean annualized, they seem to be calculating something different.
# 
# - Replicating exactly is hard but it does replicate closely and perform well
# 
# 6) Run various sklearn regressors to see which performs best, understand metrics that predict performance. MSE does not predict Sharpe. Kendall's tau, i.e. correlation of predicted vs. actual rankings, performs better.
# 
# 7) Tune ElasticNet to get slightly better performance than Lasso/OLS
# 
# 8) Run Keras NNs. They don't improve on Lasso/OLS or ElasticNet.
# 
# 
#  

# In[1]:


import os
import sys
import warnings
import numpy as np
import pandas as pd
import time 
import copy
import random
from itertools import product

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy numpy warnings

import sklearn
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, mean_squared_error, explained_variance_score, r2_score, accuracy_score
from sklearn.linear_model import LinearRegression, Lasso, lasso_path, lars_path, LassoLarsIC
from sklearn.ensemble.forest import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.utils.testing import all_estimators
import xgboost

from scipy.stats import chisquare, kendalltau

import tensorflow as tf
tf.set_random_seed(1764)
print(tf.__version__)
# confirm GPU is in use
with tf.device('/gpu:0'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
    c = tf.matmul(a, b)

with tf.Session() as sess:
    print (sess.run(c))

import keras
from keras.layers.core import Dense, Activation
from keras.layers import Input
from keras.models import Model

from keras.layers.recurrent import LSTM, GRU
from keras.regularizers import l1
from keras.models import Sequential
from keras.models import load_model
    
#import ffn
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import seaborn as sns

# this will make dataframe display as sortable widget
# commented out because sortable tables not viewable using nbviewer
from beakerx import *

import plotly
# print (plotly.__version__) # requires version >= 1.9.0
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
import plotly.figure_factory as ff

init_notebook_mode(connected=True)

random.seed(1764)
np.random.seed(1764)


# ### 1. Replicate paper

# In[2]:


print("Loading data...")
data = pd.read_csv("30_Industry_Portfolios.csv")
data = data.set_index('yyyymm')
industries = list(data.columns)
# map industry names to col nums
ind_reverse_dict = dict([(industries[i], i) for i in range(len(industries))])

rfdata = pd.read_csv("F-F_Research_Data_Factors.csv")
rfdata = rfdata.set_index('yyyymm')
data['rf'] = rfdata['RF']

# subtract risk-free rate
# create a response variable led by 1 period to predict
for ind in industries:
    data[ind] = data[ind] - data['rf']

#for ind in industries:
#    data[ind+".3m"] = pd.rolling_mean(data[ind],3)
    
#for ind in industries:
#    data[ind+".6m"] = pd.rolling_mean(data[ind],6)

#for ind in industries:
#    data[ind+".12m"] = pd.rolling_mean(data[ind],12)
    
for ind in industries:
    data[ind+".lead"] = data[ind].shift(-1)

data = data.loc[data.index[data.index > 195911]]
data = data.drop(columns=['rf'])    
data = data.dropna(axis=0, how='any')

nresponses = len(industries)
npredictors = data.shape[1]-nresponses

predictors = list(data.columns[:npredictors])
predictor_reverse_dict = dict([(predictors[i], i) for i in range(len(predictors))])

responses = list(data.columns[-nresponses:])
response_reverse_dict = dict([(responses[i], i) for i in range(len(responses))])

print(data.shape)

data[['Food', 'Food.lead']]


# In[3]:


# exclude 2017 and later to tie to paper
data = data.loc[data.index[data.index < 201701]]
data = data.loc[data.index[data.index > 195911]]
data


# In[4]:


data.to_csv("data.csv")
desc = data.describe()
desc
# min, max line up with Table 1


# In[5]:


# annualized returns don't match Table 1, oddly
# geometric mean, annualized
pd.DataFrame((np.prod(data/100 + 1)**(12.0/len(data))-1)[:30], columns=['Mean Ann. Return'])


# In[6]:


# try this way, arithmetic mean then annualize (not very correct)
#print(pd.DataFrame(((desc.loc['mean']/100+1)**12-1)[:30]))
#nope

# same
pd.DataFrame(((1 + np.mean(data, axis=0)/100)**12 -1)[:30], columns=['Mean Ann. Return'])


# In[7]:


#annualized volatility 
pd.DataFrame((desc.loc['std']*np.sqrt(12))[:30].round(2))
# lines up with table 1


# In[8]:


# Run LASSO, then OLS on selected variables

# skip last row to better match published r-squared
# looks like they forecast actuals 1960-2016 using 1959m12 to 2016m11
# not exact matches to Table 2 R-squared but almost within rounding error 
X = data.values[:-1,:npredictors]
Y = data.values[:-1,-nresponses:]
nrows = X.shape[0]
X.shape


# In[9]:


def subset_selection(X, Y, model_aic, verbose=False, responses=responses, predictors=predictors):
    
    nrows, npreds = X.shape
    nows, nresps = Y.shape
    coef_dict = []
    
    for response_index in range(nresps):
        y = Y[:,response_index]
        model_aic.fit(X, y)
        predcols = [i for i in range(npreds) if model_aic.coef_[i] !=0]

        #y_response = model_aic.predict(X)
        # print ("In-sample LASSO R-squared: %.6f" % r2_score(y, y_response))
        if verbose and responses:
            print("LASSO variables selected for %s: " % responses[response_index])
            print([predictors[i] for i in predcols])
        
        if not predcols:
            if verbose and responses:
                print("No coefs selected for " + responses[response_index] + ", using all")
                print("---")
            predcols = list(range(npreds))    
            
        # fit OLS vs. selected vars, better fit w/o LASSO penalties
        # in-sample R-squared using LASSO coeffs
        coef_dict.append(predcols)
        if verbose and responses and predictors:
            print("Running OLS for " + responses[response_index] + " against " + str([predictors[i] for i in predcols]))
            # col nums of selected responses
            model_ols = LinearRegression()
            model_ols.fit(X[:, predcols], y)
            y_pred = model_ols.predict(X[:, predcols])
            print ("In-sample OLS R-squared: %.2f%%" % (100 * r2_score(y, y_pred)))
            print("---")
            
    return coef_dict

#coef_dict = subset_selection(X, Y, LassoLarsIC(criterion='aic'))
coef_dict = subset_selection(X, Y, LassoLarsIC(criterion='aic'), verbose=True, responses=responses, predictors=predictors)
print(coef_dict)
# These subsets line up closely with Table 2
# except Clths, Whlsl, we get different responses


# In[10]:


# same predictors selected for all but 2 response vars
# use predictors from paper to match results
if True: # turn off/on
    coef_dict_temp = {}
    coef_dict_temp['Food.lead'] = ['Clths', 'Coal', 'Util', 'Rtail']
    coef_dict_temp['Beer.lead'] = ['Food', 'Clths', 'Coal']
    coef_dict_temp['Smoke.lead'] = ['Txtls', 'Carry', 'Mines', 'Coal', 'Oil', 'Util', 'Telcm', 'Servs', 'Paper', 'Trans', 'Fin']
    coef_dict_temp['Games.lead'] = ['Books', 'Clths', 'Coal', 'Fin']
    coef_dict_temp['Books.lead'] = ['Games', 'Books', 'Coal', 'Oil', 'Util', 'Servs', 'BusEq', 'Rtail', 'Fin']
    coef_dict_temp['Hshld.lead'] = ['Clths', 'Coal', 'Rtail']
    coef_dict_temp['Clths.lead'] = ['Books', 'Clths', 'Chems', 'Steel', 'ElcEq', 'Carry',  'Coal', 'Oil', 'Util','Telcm', 'Servs', 'BusEq', 'Rtail']
    # Running OLS for Clths against ['Clths', 'Coal', 'Oil', 'Servs', 'Rtail']
    coef_dict_temp['Hlth.lead'] = ['Books', 'Mines', 'Coal', 'Util']
    coef_dict_temp['Chems.lead'] = ['Clths']
    coef_dict_temp['Txtls.lead'] = ['Clths', 'Autos', 'Coal', 'Oil', 'Rtail', 'Fin']
    coef_dict_temp['Cnstr.lead'] = ['Clths', 'Coal', 'Oil', 'Util', 'Trans', 'Rtail', 'Fin']
    coef_dict_temp['Steel.lead'] = ['Fin']
    coef_dict_temp['FabPr.lead'] = ['Trans', 'Fin']
    coef_dict_temp['ElcEq.lead'] = ['Fin']
    coef_dict_temp['Autos.lead'] = ['Hshld', 'Clths', 'Coal', 'Oil', 'Util', 'BusEq', 'Rtail', 'Fin']
    coef_dict_temp['Carry.lead'] = ['Trans']
    coef_dict_temp['Mines.lead'] = []
    coef_dict_temp['Coal.lead'] = ['Beer', 'Smoke', 'Books', 'Autos', 'Coal', 'Oil', 'Paper', 'Rtail']
    coef_dict_temp['Oil.lead'] = ['Beer', 'Hlth', 'Carry']
    coef_dict_temp['Util.lead'] = ['Food', 'Beer', 'Smoke', 'Hshld', 'Hlth', 'Cnstr', 'FabPr', 'Carry', 'Mines', 'Oil', 'Util', 'Telcm', 'BusEq', 'Whlsl', 'Fin', 'Other']
    coef_dict_temp['Telcm.lead'] = ['Beer', 'Smoke', 'Books', 'Hshld', 'Cnstr', 'Autos', 'Carry', 'Mines', 'Coal', 'Oil', 'Util', 'Servs', 'BusEq', 'Rtail', 'Meals', 'Fin']
    coef_dict_temp['Servs.lead'] = ['Smoke', 'Books', 'Steel', 'Oil', 'Util', 'Fin']
    coef_dict_temp['BusEq.lead'] = ['Smoke', 'Books', 'Util']
    coef_dict_temp['Paper.lead'] = ['Clths', 'Coal', 'Oil', 'Rtail', 'Fin']
    coef_dict_temp['Trans.lead'] = ['Fin']
    coef_dict_temp['Whlsl.lead'] = ['Food', 'Beer', 'Smoke', 'Books', 'Hlth', 'Carry', 'Coal', 'Oil', 'Util', 'Telcm', 'Servs', 'BusEq', 'Fin', 'Other']
    # Running OLS for Whlsl against ['Food', 'Smoke', 'Books', 'Carry', 'Coal', 'Oil', 'Util', 'Servs', 'Fin', 'Other']
    coef_dict_temp['Rtail.lead'] = ['Rtail']
    coef_dict_temp['Meals.lead'] = ['Smoke', 'Books', 'Clths', 'Steel', 'Carry', 'Coal', 'Oil', 'Util', 'Servs', 'BusEq', 'Meals', 'Fin']
    coef_dict_temp['Fin.lead'] = ['Fin']
    coef_dict_temp['Other.lead'] = ['Clths', 'Fin']
    
coef_dict_paper = []
for response in responses:
    print(response, " -> ", coef_dict_temp[response])
    coef_dict_paper.append([predictor_reverse_dict[jstr] for jstr in coef_dict_temp[response]])
print(coef_dict_paper)


# In[11]:


def predict_with_subsets(X, Y, create_model, coef_dict, verbose=False):
    """evaluate subset selection, pass a model function and subsets, compute avg R-squared"""
    global responses

    nrows, ncols = Y.shape
    model = create_model()
    
    scores = []
    for response_col in range(ncols):
        y = Y[:,response_col]

#        print("LASSO variables selected for %s: " % pred)
#        print(coef_dict[pred])
        
        if not coef_dict[response_col]:
            if verbose:
                print("No coefs selected for " + responses[response_col])
 #           print("---")
            continue
        # fit model vs. selected vars, better fit w/o LASSO penalties
        # in-sample R-squared using LASSO coeffs
        #print("Running model for " + pred + " against " + str(coef_dict[pred]))
        # col nums of selected predictors
        predcols = coef_dict[response_col]
        model.fit(X[:, predcols], y)
        y_pred = model.predict(X[:, predcols])
        score = r2_score(y, y_pred)
        scores.append(score)
        if verbose:
            print ("In-sample R-squared: %.2f%% for %s against %s" % (score*100, responses[response_col], 
                                                                      str([predictors[i] for i in coef_dict[response_col]])))
#        print("---")
    
    if verbose:
        print("Mean R-squared: %.2f%%" % (100 * np.mean(np.array(scores))))
    return np.mean(np.array(scores))
    
predict_with_subsets(X, Y, LinearRegression, coef_dict_paper, verbose=True)


# In[12]:


# use all predictors - higher in-sample R-squared
coef_dict_all = []
for _ in responses:
    coef_dict_all.append(range(len(predictors)))
predict_with_subsets(X, Y, LinearRegression, coef_dict_all, verbose=False)


# In[13]:


# first iteration will train up to including 196911
# will use 196912 to predict 197001
# 1970101 will be first month of performance to use
# train on first 121 months up to 196912 (0:120), put first prediction in P[121] (122nd row)
# first month of performance will be 197002
FIRST_TRAIN_MONTHS = 121
FIRST_PREDICT_MONTH = FIRST_TRAIN_MONTHS # This is stupid but keeps my head straight

print(X[FIRST_TRAIN_MONTHS])
print(data.iloc[FIRST_TRAIN_MONTHS][:30])


# In[14]:


class PredictWrapper():
    """Wrap an sklearn model e.g. LinearRegression to fit, predict all vars as a vector, 
    match the way our Keras model will do it"""

    def __init__(self, create_model, coef_dict, fit_missing=None):
        self.create_model = create_model        
        self.coef_dict = coef_dict
        self.fit_missing = fit_missing
        self.models = []
        
    def fit(self, X_fit, Y_fit, verbose=False):
        
        self.nrows, self.ycols = Y_fit.shape
        self.models = []
        # fit model for each column
        for responsecol in range(self.ycols):
            if self.coef_dict[responsecol]:
                # column indexes to fit against each other
                predcols = self.coef_dict[responsecol]
            else: # no columns selected
                if not self.fit_missing:
                    # default: don't fit if no predictors selected
                    self.models.append(None)
                    #print(self.models[-1])
                    continue
                elif self.fit_missing == "mean":
                    # append a numeric value to use
                    self.models.append(np.mean(Y_fit[:,responsecol]))
                    #print(self.models[-1])
                    continue
                elif self.fit_missing == "all":
                    # predcols = all columns
                    predcols = range(X_fit.shape[1])

            model = self.create_model() # create 1 sklearn model for each column
            model.fit(X_fit[:, predcols], Y_fit[:,responsecol])
            if verbose:
                print("fit on " + str(X_fit[:, predcols].shape) + str(predcols))
                print(model.coef_)
                            
            self.models.append(model)
                
            #debug
            #print(responsecol)
            #print(X_fit[:, predcols])
            #print("=====")
            #print(Y_fit[:,responsecol])
            #print("=====")
            #print(self.model.coef_)
            #print(self.model.intercept_)
            #print("=====")                

    def predict(self, X_predict, verbose=False):

        predictions = []
        nrows = X_predict.shape[0]

        for responsecol in range(self.ycols):
            #print (type(self.models[responsecol]))
            if type(self.models[responsecol]) == np.float64:
                pred = np.array([self.models[responsecol]] * nrows).reshape(nrows,1)
                predictions.append(pred)
                sys.stdout.write('\010#')
            elif not self.models[responsecol]: 
                # don't predict
                predictions.append(np.array([np.nan] * nrows ).reshape(nrows,1))
                sys.stdout.write('\010N')
            else:
                predcols = self.coef_dict[responsecol]
                y_pred = self.models[responsecol].predict(X_predict[:,predcols])
                if verbose:
                    print("predict on" + str(X_predict[:, predcols].shape) + str(predcols))
                    print(y_pred)
                predictions.append(y_pred.reshape(nrows,1))
        
        return np.hstack(predictions)
        
        
# ### BacktestModel class to abstract walk-forward validation and backtesting workflow from modeling, portfolio construction, reporting

# In[15]:


# typical pipeline
# backtestmodel = BacktestModel(X, # predictors
#                              Y, # responses
#                              create_model=LinearRegression, # create_model which returns a model (needed for 'timestep' which creates different model each timestep)
#                              # or model = someKerasModel, # initialized model that supports fit(X,Y), predict(X) , predicts an entire row,
#                              coef_dict_param=coef_dict_paper, # how to map predictors to responses ("all", "timestep", or a list of lists)
#                              startindex=FIRST_TRAIN_MONTHS, # initial training for backtest
#                              fit_missing='mean', # what to do when no predictors are selected in coef_dict_param - use all predictors, use historical mean, use np.nan
#                              scaler = None) # scaling function like MinMaxScaler

# backtestmodel.gen_predictions(verbose=False) # starting from startindex, step through X,Y month by month, fit up to current month, predict next month, store prediction in self.P
# backtestmodel.walkforward_xval(n_splits=5, verbose=True) # calls gen_predictions with a large step, fits and predicts one large fold at a time (useful to cross-validate quickly)
# backtestmodel.evaluate_predictions() # report metrics on prediction : MSE etc. #TODO: support custom calcs before/after/instead of default calcs
# backtestmodel.gen_returns(calc_returns, verbose=True) # takes a function that returns portfolio returns based on self.P, stores in self.R
# backtestmodel.report_returns(start_date=start_date_str, freq='M') # calc cumulative perf and report (TODO: allow it to take a reporting function to run before/after/in place of default report)
# backtestmodel.evaluate_quantiles(chart=True, verbose=True) # report quantile metrics # TODO: make this a custom calc passed into evaluate_predictions

class BacktestModel():
    
    def __init__(self, 
                 X, # predictors
                 Y, # responses
                 model=None, # model that supports fit(X,Y), predict(X) , predicts an entire row,
                 create_model=None, # or create_model which returns a model (needed for 'timestep' but slows down so pass model if dynamic not needed)
                 coef_dict_param="all", # mapping of predictors to responses ("all", "timestep", or a list of lists)
                 startindex=FIRST_TRAIN_MONTHS,
                 scaler=None,
                 fit_missing=None):
        
        self.Xrows, self.Xcols = X.shape
        self.Yrows, self.Ycols = Y.shape
        
        if self.Xrows != self.Yrows:
            raise(ValueError, "Shapes differ: X %s, Y %s" % (str(X.shape), str(Y.shape)))            
            
        self.X = X
        self.Y = Y
        self.Xscale = X.copy()
        self.Yscale = Y.copy()

        if scaler:
            print("scaler: %s " %str(scaler))
            # by rows
            # MinMaxScaler: each row (min->0, max->1) 
            # StandardScaler: each row (mean->0, SD->1)            
            # self.Xscale = scaler().fit_transform(self.Xscale.transpose()).transpose()
            # self.Yscale = scaler().fit_transform(self.Yscale.transpose()).transpose()
            # by cols
            # MinMaxScaler: each col (min->0, max->1) 
            # StandardScaler: each col (mean->0, SD->1)            
            self.Xscale = scaler().fit_transform(self.Xscale)
            self.Yscale = scaler().fit_transform(self.Yscale)
        
        self.model = model
        self.create_model = create_model
        self.coef_dict_param = coef_dict_param
        self.startindex = startindex
        self.fit_missing = fit_missing

    def fit_predict(self, ntrain, npredict=1, verbose=False):
        """for backtest, train model using Y v. X 
        train on first ntrain rows. if ntrain=121, fit 0:120
        predict following npredict rows 
        if npredict=1, predict row 121
        if npredict=12, predict rows 121-132
        """
        
        # fit first ntrain rows
        X_fit = self.Xscale[:ntrain]  # e.g. 0:120
        Y_fit = self.Yscale[:ntrain]
        # predict npredict rows
        X_predict = self.Xscale[ntrain:ntrain+npredict] # 121-122
        X_predict = X_predict.reshape(npredict,self.Xcols)
       
        # if no coef_dict select predictors into coef_dict
        if self.coef_dict_param == "timestep":
            msg = "Performing subset selection"
            coef_dict = subset_selection(X_fit, Y_fit, LassoLarsIC(criterion='aic'))
        # if coef_dict == "all" use all predictors for each response        
        elif self.coef_dict_param == 'all':
            msg = "Using all predictors"
            coef_dict = [range(self.Xcols) for _ in range(self.ycols)]
        else: # should check valid dict
            msg = "Using coef_dict predictors"
            coef_dict = self.coef_dict_param
        if verbose: 
            print(msg)

        if self.create_model:
            self.model = PredictWrapper(self.create_model, coef_dict, fit_missing=self.fit_missing)
            
        self.model.fit(X_fit, Y_fit, verbose=verbose)
        return self.model.predict(X_predict, verbose=verbose)

    # predict all months
    # initial train_months = 120 -> train first model on 120 rows
    # first prediction will be in P[120] (121st row)
    # step = 6 -> predict following 6 rows, then step forward 6 months at a time
    # initialize predictions matrix self.P
    
    # use either step or folds
    # step, do range(self.startindex, nrows, step)
    # folds, at each fold train 0:startfold, predict startfold+1:endfold
    # store only out-of-sample predictions in P, calc out-of-sample MSE
    
    # using a step > 1 or folds is quicker, for quicker xval, or to speed up by not estimating model at each timestep

    def gen_predictions(self,
                        step=1, 
                        splits=None,
                        verbose=False):

        self.P = np.zeros_like(self.Y)

        progress_i = 0
        self.nrows, self.ycols = Y.shape
        
        if splits:
            month_indexes = splits[:-1] # last index is nrows
        else:
            # create list of steps
            month_indexes = list(range(self.startindex, nrows, step))
        steps = [month_indexes[i+1]-month_indexes[i] for i in range(len(month_indexes)-1)]
        # last step -> end
        steps.append(self.nrows - month_indexes[-1])
        
        if verbose:
            print ("Steps: " + str(month_indexes))

        for month_index, forecast_rows in zip(month_indexes, steps):
            if verbose:
                print("Training on first %d rows (%d:%d), putting predictions in rows %s" % (month_index, 
                                                                                            0, month_index-1, 
                                                                                            str(range(month_index,month_index+forecast_rows))))
            predictions = self.fit_predict(month_index, forecast_rows, verbose=False)
            
            first_pred_row = month_index
            for row_index in range(forecast_rows):
                self.P[first_pred_row+row_index] = predictions[row_index]
            sys.stdout.write('.')
            progress_i += 1
            if progress_i % 80 == 0:
                print("")
                print("%s Still training step %d of %d" % (time.strftime("%H:%M:%S"), progress_i, len(month_indexes)))
            sys.stdout.flush()
        print("")
        

    def evaluate_predictions(self):
        
        # evaluate prediction (can move to separate function)
        msetemp = (self.P[self.startindex:]-self.Yscale[self.startindex:])**2
        #remove nans
        msetemp = msetemp[~np.isnan(msetemp)]
        self.mse = np.mean(msetemp)
        print("OOS MSE across all predictions: %.4f" % self.mse)
        self.model.fit(self.Xscale, self.Yscale)
        Y_pred = self.model.predict(self.Xscale)
        self.in_sample_mse = np.mean((Y_pred - self.Yscale) ** 2)
        print("In-sample MSE: %.4f" % self.in_sample_mse)

        # force unpredicted ys to be nans, then remove nans
        vartemp = self.Yscale[self.startindex:] - self.P[self.startindex:] + self.P[self.startindex:]
        vartemp = vartemp[~np.isnan(vartemp)]
        y_variance = np.var(vartemp[self.startindex:])
        print("Variance: %.4f" % (y_variance))
        print("R-squared: %.4f" % (1- self.mse/y_variance))
        
        return(self.mse)

    def evaluate_quantiles(self, chart=False, verbose=False):
    
        self.P_quantiles = np.zeros_like(self.P)
        self.Y_quantiles = np.zeros_like(self.Y)
        self.kendalltaus = []
        self.ktpvals = []
        # compute score for predicted quantiles vs. actual (expected) quantiles
        N_QUANTILES=5
        for row in range(self.startindex, self.P_quantiles.shape[0]):
            #print(self.P[row])
            self.P_quantiles[row] = pd.qcut(self.P[row], N_QUANTILES, range(N_QUANTILES))
            self.Y_quantiles[row] = pd.qcut(self.Y[row], N_QUANTILES, range(N_QUANTILES))
            kt, p_val = kendalltau(self.P[row], self.Y[row])
            self.kendalltaus.append(kt)
            self.ktpvals.append(p_val)

        self.kendalltau = np.mean(self.kendalltaus)
        self.kendalltau_pvalue = np.mean(self.ktpvals)         
        print("Avg rank correlation (Kendall's tau): %.4f (Expected: 0)" % (self.kendalltau))
        pred_quantiles = self.P_quantiles[self.startindex:]
        true_quantiles = self.Y_quantiles[self.startindex:]

        nrows, ncols = pred_quantiles.shape

        pred_quantiles = pred_quantiles.reshape(nrows*ncols)
        true_quantiles = true_quantiles.reshape(nrows*ncols)
        self.quintile_accuracy = accuracy_score(pred_quantiles, true_quantiles)
        print("5-quintile accuracy: %.4f (Expected: 0.2)" % (self.quintile_accuracy))
        
        pred_direction = np.zeros(nrows*ncols)
        true_direction = np.zeros(nrows*ncols)
        for i in range(nrows*ncols):
            if pred_quantiles[i] == 4:
                pred_direction[i] = 1
            elif pred_quantiles[i] == 0:
                pred_direction[i] = -1
            if true_quantiles[i] == 4:
                true_direction[i] = 1
            elif true_quantiles[i] == 0:
                true_direction[i] = -1
        self.directional_accuracy = accuracy_score(pred_direction, true_direction)
        print("Long/short/flat accuracy: %.4f (Expected: 0.44)" % (self.directional_accuracy))
        
        nrows = nrows * ncols
            
        conf_mat_expected = np.array([[0.64, 0.16],[0.16, 0.04]])*nrows

        myscores = []
        for q in range(5):
            temp_pred = pred_quantiles == q
            temp_actual = true_quantiles == q
            conf_mat5 = confusion_matrix(temp_pred, temp_actual)
            diff_mat = conf_mat5 - conf_mat_expected
            if verbose:
                print("Confusion matrix for quantile %d" % q)
                print(conf_mat5)
                cstmp, cspvtmp = chisquare(conf_mat5.reshape(4), conf_mat_expected.reshape(4))
                print("Chi-square: %.4f (p-value: %.8f)" % (cstmp, cspvtmp))

            # probably no valid statistical interpretation but 
            # average of improvement in true positive % and true negative %
            myscore = diff_mat[1][1]
            myscores.append(myscore)

        # sum of true positive for top and bottom quintiles
        self.excess_tp = myscores[0] + myscores[4]
        print("Excess true positive in quintiles 1 + 5: %f" % (self.excess_tp))

        conf_mat = confusion_matrix(pred_quantiles, true_quantiles)
        if chart:
            fig, ax = plt.subplots(figsize=(10,10))
            sns.heatmap(conf_mat, annot=True, fmt='d')
            plt.ylabel('Actual Quintile')
            plt.xlabel('Predicted Quintile')
            plt.show()
        
        return None

    def walkforward_xval (self, n_splits=5, verbose=False):
        """quick and dirty genreturns, with a step"""
        # generate k-folds
        kf = KFold(n_splits=n_splits)
        kf.get_n_splits(X)
        last_indexes = []
        for train_index, test_index in kf.split(X):
            # use test_index as last index to train
            last_index = test_index[-1] + 1
            last_indexes.append(last_index)
        print("%s Generate splits %s" % (time.strftime("%H:%M:%S"), str([i for i in last_indexes])))
        #override startindex
        self.startindex = last_indexes[0]
        return self.gen_predictions(splits=last_indexes, verbose=verbose)
    
    def gen_returns(self, port_returns_func, verbose=False):

        self.R = np.zeros(self.P.shape[0])
        first_pred_month=self.startindex
        
        indcount = [0] * self.ycols
        longcount = [0] * self.ycols
        shortcount = [0] * self.ycols
        
        for month_index in range(first_pred_month, nrows-1):
            return_month = month_index + 1
            port_return, long_indexes, short_indexes = port_returns_func(self.P[month_index], 
                                                                         self.X[return_month])
            self.R[return_month] = port_return
            
            for i in long_indexes:
                indcount[i] += 1
                longcount[i] += 1
            for i in short_indexes:
                indcount[i] += 1
                shortcount[i] += 1
        if verbose:
            for i in range(len(responses)):
                print("%s: long %d times, short %d times, total %d times" % (predictors[i], 
                                                                             longcount[i], 
                                                                             shortcount[i], 
                                                                             indcount[i]))
        return self.R

    def report_returns(self, start_date='01/01/1970', freq='M'):

        first_pred_month=self.startindex        
        self.results = self.R[first_pred_month:]
        nmonths = self.results.shape[0]
        nyears = nmonths/12.0
        index = pd.date_range(start_date,periods=nmonths, freq=freq)
        perfdata = pd.DataFrame(self.results,index=index,columns=['Returns'])
        perfdata['Equity'] = 100 * np.cumprod(1 + self.results / 100)
        self.cumulative_return = perfdata['Equity']
        self.mean_monthly_return_annualized = np.mean(1 + self.results/100) ** 12 - 1
        self.mean_return = (self.cumulative_return[-1]/100) ** (1.0/nyears) - 1
        self.annualized_vol = np.std(self.results/100) * np.sqrt(12.0)
        self.sharpe = self.mean_monthly_return_annualized/self.annualized_vol
        print("Mean return: %.3f%%" % (self.mean_return * 100 ))
        #print("Mean monthly annualized return: %.3f%%" % (self.mean_monthly_return_annualized * 100 ))
        #print("Monthly annualized volatility: %.3f%%" % (self.annualized_vol * 100))
        print("Monthly Sharpe ratio: %.3f" % (self.sharpe))


# In[16]:


# return calculation passed to gen_returns

NUMSTOCKS = 6 # top quintile (and bottom)

def calc_returns(prediction_row, return_row, numstocks=NUMSTOCKS, verbose=False):

    # ensure nan sorts to top for shorts
    short_sort_array = [999999 if np.isnan(x) else x for x in prediction_row]
    # pick bottom numstocks
    select_array = np.argsort(short_sort_array)
    short_indexes = select_array[:numstocks]

    # ensure nan sorts to bottom for longs
    long_sort_array = [-999999 if np.isnan(x) else x for x in prediction_row]
    # pick top numstocks
    select_array = np.argsort(long_sort_array)
    long_indexes = select_array[-numstocks:]
    
    if verbose:
        print("Longs: %s" %(str([(i,prediction_row[i]) for i in long_indexes])))
        print("Shorts: %s" %(str([(i,prediction_row[i]) for i in short_indexes])))

    # compute equal weighted long/short return
    return np.mean(return_row[long_indexes])/2 - np.mean(return_row[short_indexes])/2, long_indexes, short_indexes


# In[17]:


start_date_int = data.index[FIRST_TRAIN_MONTHS]
start_year, start_month = start_date_int // 100, start_date_int % 100
start_date_str = "%02d/%02d/%d" % (start_month, 1, start_year)
start_date_str


# In[18]:


# test fit_predict
backtestmodel = BacktestModel(X, Y, create_model=LinearRegression, coef_dict_param=coef_dict_all, startindex=FIRST_TRAIN_MONTHS)
print(backtestmodel.fit_predict(121, npredict=3, verbose=False))


# In[19]:


# test model on our calculated coef_dict
# bad methodology, introduces snooping
# selects LASSO vars over whole timespan
# uses selected vars to do OLS backtest
# included as an example of what not to do
backtestmodel = BacktestModel(X, Y, 
                              create_model=LinearRegression, 
                              coef_dict_param=coef_dict_paper, 
                              startindex=FIRST_TRAIN_MONTHS,
                              fit_missing='mean',
                              scaler = None)
backtestmodel.gen_predictions(verbose=False)
backtestmodel.gen_returns(calc_returns, verbose=True)
backtestmodel.report_returns(start_date=start_date_str, freq='M')
backtestmodel.evaluate_predictions()
backtestmodel.evaluate_quantiles(chart=True, verbose=True)


# ### Functions using Plotly to generate charts for performance, scatters, heatmaps

# In[20]:


# chart performance

def mychart(args, names=None, title=""):
    x_coords = np.linspace(1970, 2016, args[0].shape[0])
    plotdata = []
    for i in range(len(args)):
        tracelabel = "Trace %d" % i
        if names:
                tracelabel=names[i]
        plotdata.append(Scatter(x=x_coords,
                                y=args[i].values.reshape(-1),
                                mode = 'line',
                                name=tracelabel))    

    layout = Layout(
        title = title,
        autosize=False,
        width=900,
        height=600,
        yaxis=dict(
            type='log',
            autorange=True
        )
    )
    
    fig = Figure(data=plotdata, layout=layout)
    
    return iplot(fig)
    

# In[21]:


def myscatter(arg1, arg2, names=None, title=""):
    
    plotdata = []
    
    plotdata.append(Scatter(
        x = arg1,
        y = arg2,
        mode = 'markers'
    ))

    layout = dict(
        title=title,
        autosize=False,
        width="600",
        height="480",
        yaxis=dict(
#            type='log',
            autorange=True
        )
    )
    
#    py.iplot(data, filename='basic-scatter')

    fig = Figure(data=plotdata, layout=layout)
    
    return iplot(fig)


# In[22]:


def plot_matrix(lossframe, x_labels, y_labels, x_suffix="", y_suffix=""):

    pivot = lossframe.pivot_table(index=[y_labels], columns=[x_labels], values=['mse'])
#    print(pivot)
    # specify labels as strings, to force plotly to use a discrete axis
#    print(pivot.columns.levels[1]).values
#    print(lossframe[x_labels].dtype)
    
    if lossframe[x_labels].dtype == np.float64 or lossframe[x_labels].dtype == np.float32:
        xaxis = ["%f %s" % (i, x_suffix) for i in pivot.columns.levels[1].values]
    else:
        xaxis = ["%d %s" % (i, x_suffix) for i in pivot.columns.levels[1].values]
    if lossframe[y_labels].dtype == np.float64 or lossframe[y_labels].dtype == np.float32:
        yaxis = ["%f %s" % (i, y_suffix) for i in pivot.index.values]
    else:
        yaxis = ["%d %s" % (i, y_suffix) for i in pivot.index.values]
        
#    print(xaxis, yaxis)
    """plot a heat map of a matrix"""
    chart_width=640
    chart_height=480
    
    layout = dict(
        title="%s v. %s" % (x_labels, y_labels),
        height=chart_height,
        width=chart_width,     
        margin=dict(
            l=150,
            r=30,
            b=120,
            t=100,
        ),
        xaxis=dict(
            title=x_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
        yaxis=dict(
            title=y_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
    )
    
    data = [Heatmap(z=pivot.values,
                    x=xaxis,
                    y=yaxis,
                    colorscale=[[0, 'rgb(0,0,255)', [1, 'rgb(255,0,0)']]],
                   )
           ]

    fig = Figure(data=data, layout=layout)
    return iplot(fig, link_text="")


# ### Compare LASSO, OLS models

# In[23]:


perf_post_LASSO = backtestmodel.cumulative_return
mychart([perf_post_LASSO],["Post-LASSO"], title="Post-LASSO")


# In[24]:


# do LASSO subset selection at each timestep 
backtestmodel = BacktestModel(X, Y, 
                              create_model=LinearRegression, 
                              coef_dict_param="timestep", 
                              startindex=FIRST_TRAIN_MONTHS,
                              fit_missing='mean',
                              scaler = None)
backtestmodel.gen_predictions(verbose=False)
backtestmodel.gen_returns(calc_returns, verbose=False)
backtestmodel.report_returns(start_date=start_date_str, freq='M')
backtestmodel.evaluate_predictions()
backtestmodel.evaluate_quantiles(chart=True, verbose=True)
perf_LASSO_each_timestep = backtestmodel.cumulative_return
mychart([perf_LASSO_each_timestep],["LASSO each timestep"], title="LASSO each timestep")


# In[25]:


# use all predictors at each timestep 
backtestmodel = BacktestModel(X, Y, 
                              create_model=LinearRegression, 
                              coef_dict_param="all", 
                              startindex=FIRST_TRAIN_MONTHS,
                              fit_missing='mean',
                              scaler=None)
backtestmodel.gen_predictions(verbose=False)
backtestmodel.gen_returns(calc_returns, verbose=False)
backtestmodel.report_returns(start_date=start_date_str, freq='M')
backtestmodel.evaluate_predictions()
backtestmodel.evaluate_quantiles(chart=True, verbose=False)
perf_all_preds = backtestmodel.cumulative_return
mychart([perf_all_preds],["All preds"], title="OLS all predictors")


# In[26]:


# pure LASSO (not LASSO followed by OLS on selected subset)
def create_model_lasso():
    return LassoLarsIC(criterion='aic')    
    
# use all predictors at each timestep 
backtestmodel = BacktestModel(X, Y, 
                              create_model=create_model_lasso, 
                              coef_dict_param="all", 
                              startindex=FIRST_TRAIN_MONTHS,
                              fit_missing='mean',
                              scaler=None)
backtestmodel.gen_predictions(verbose=False)
backtestmodel.gen_returns(calc_returns, verbose=False)
backtestmodel.report_returns(start_date=start_date_str, freq='M')
backtestmodel.evaluate_predictions()
backtestmodel.evaluate_quantiles(chart=True, verbose=False)
perf_LASSO_only = backtestmodel.cumulative_return
mychart([perf_LASSO_only],["LASSO only"], title="LASSO only")


# In[27]:


mychart([perf_LASSO_each_timestep, perf_all_preds, perf_LASSO_only],["LASSO / OLS", "OLS Only", "LASSO only"])


# ### Use walk-forward cross-validation to evaluate a variety of sklearn regression algos

# In[46]:


# test walk-forward xval, get a baseline MSE for LinearRegression: ~41
backtestmodel = BacktestModel(X, Y, 
                              create_model=LinearRegression, 
                              coef_dict_param="timestep", 
                              startindex=FIRST_TRAIN_MONTHS,
                              fit_missing='mean',
                              scaler=None)

backtestmodel.walkforward_xval(n_splits=5, verbose=True)
backtestmodel.gen_returns(calc_returns, verbose=False)
backtestmodel.report_returns(start_date=start_date_str, freq='M')
backtestmodel.evaluate_predictions()
backtestmodel.evaluate_quantiles(chart=False, verbose=False)


# In[47]:


# try all the sklearn regression estimators
estimators = []

for name, class_ in all_estimators():
    if not issubclass(class_, sklearn.base.ClassifierMixin):
        if hasattr(class_, 'predict'):
            print(class_)
            estimators.append(class_)


# In[48]:


# eliminate some that didn't work, gave errors, nans, maybe need correct parameters
estimators = [
    sklearn.ensemble.bagging.BaggingRegressor,
    sklearn.ensemble.forest.ExtraTreesRegressor,
    sklearn.ensemble.forest.RandomForestRegressor,
    sklearn.ensemble.gradient_boosting.GradientBoostingRegressor,
    #sklearn.ensemble.iforest.IsolationForest,
    sklearn.ensemble.weight_boosting.AdaBoostRegressor,
    #sklearn.gaussian_process.gpr.GaussianProcessRegressor,
    sklearn.kernel_ridge.KernelRidge,
    sklearn.linear_model.base.LinearRegression,
    #sklearn.linear_model.bayes.ARDRegression, # takes a couple of hours
    sklearn.linear_model.bayes.BayesianRidge,
    sklearn.linear_model.coordinate_descent.ElasticNet,
    sklearn.linear_model.coordinate_descent.ElasticNetCV,
    sklearn.linear_model.coordinate_descent.Lasso,
    sklearn.linear_model.coordinate_descent.LassoCV,
    sklearn.linear_model.huber.HuberRegressor,
    sklearn.linear_model.least_angle.Lars,
    sklearn.linear_model.least_angle.LarsCV,
    sklearn.linear_model.least_angle.LassoLars,
    sklearn.linear_model.least_angle.LassoLarsCV,
    sklearn.linear_model.least_angle.LassoLarsIC,
    sklearn.linear_model.omp.OrthogonalMatchingPursuit,
    sklearn.linear_model.omp.OrthogonalMatchingPursuitCV,
    sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor,
    sklearn.linear_model.ransac.RANSACRegressor,
    sklearn.linear_model.ridge.Ridge,
    sklearn.linear_model.ridge.RidgeCV,
    sklearn.linear_model.stochastic_gradient.SGDRegressor,
    #sklearn.linear_model.theil_sen.TheilSenRegressor, very slow
    #sklearn.mixture.bayesian_mixture.BayesianGaussianMixture,
    #sklearn.mixture.gaussian_mixture.GaussianMixture,
    sklearn.neighbors.regression.KNeighborsRegressor,
    #sklearn.neighbors.regression.RadiusNeighborsRegressor, returned nan
    #sklearn.neural_network.multilayer_perceptron.MLPRegressor, slow, tried above 
    sklearn.svm.classes.LinearSVR,
    sklearn.svm.classes.NuSVR,
    #sklearn.svm.classes.OneClassSVM,
    sklearn.svm.classes.SVR,
    sklearn.tree.tree.DecisionTreeRegressor,
    sklearn.tree.tree.ExtraTreeRegressor,
    xgboost.XGBRegressor,
]
 

# In[49]:


# run all the models
sharpe_list = []
mse_list = []
kendalltau_list = []
excess_tp_list = []
quintile_accuracy_list = []
directional_accuracy_list = []
keys = []

for estimator in estimators:
    print(str(estimator)[8:-2])
    backtestmodel = BacktestModel(X, Y, 
                                  create_model=estimator,
                                  coef_dict_param="all", 
                                  startindex=FIRST_TRAIN_MONTHS,
                                  fit_missing='mean',
                                  scaler=None)
    backtestmodel.gen_predictions(verbose=False)
    backtestmodel.gen_returns(calc_returns, verbose=False)
    backtestmodel.report_returns(start_date=start_date_str, freq='M')
    backtestmodel.evaluate_predictions()
    backtestmodel.evaluate_quantiles(chart=True, verbose=False)
    keys.append(str(estimator)[8:-2])
    sharpe_list.append(backtestmodel.sharpe)
    mse_list.append(backtestmodel.mse)
    kendalltau_list.append(backtestmodel.kendalltau)
    excess_tp_list.append(backtestmodel.excess_tp)
    quintile_accuracy_list.append(backtestmodel.quintile_accuracy)
    directional_accuracy_list.append(backtestmodel.directional_accuracy)
    print("---")

    
# In[50]:


# make a dataframe with all the metrics
est_names = [str(estimator)[8:-2] for estimator in estimators]

skmetricsframe = pd.DataFrame(list(zip(est_names, mse_list, sharpe_list, quintile_accuracy_list, directional_accuracy_list,
                                     kendalltau_list, excess_tp_list)), columns = ['name', 'mse', 'sharpe', 'quantile_score', 
                                                                                   'direction_score', 'kendalltau', 
                                                                                   'truepositive_score'])

skmetricsframe.to_csv("sklearn_metrics.csv")


# In[28]:


skmetricsframe = pd.read_csv("sklearn_metrics.csv")[['name','mse','sharpe','quantile_score','direction_score','kendalltau','truepositive_score']]
skmetricsframe


# In[42]:


with open('sklearn_table.html', 'w') as fo:
    skmetricsframe.sort_values(['kendalltau'], ascending=False).iloc[:12,:].to_html(fo, float_format=lambda x: '%.3f' % x)


# In[43]:


# SGDRegressor diverged to 6.309236364118325e+23, omit out of range values for scatter chart
mse_list = skmetricsframe['mse'].values
mse_list = [x if x < 75 else np.nan for x in mse_list]
mse_list[24] = np.nan
sharpe_list = skmetricsframe['sharpe'].values
kendalltau_list  = skmetricsframe['kendalltau'].values
excess_tp_list = skmetricsframe['truepositive_score'].values
quintile_accuracy_list = skmetricsframe['quantile_score'].values
directional_accuracy_list = skmetricsframe['direction_score'].values
# exclude > 100
#mse_list[20] = np.nan
#mse_list[21] = np.nan


# In[44]:


myscatter(mse_list, sharpe_list, title="Sharpe vs. MSE")
myscatter(quintile_accuracy_list, sharpe_list, title="Sharpe vs. Quantile Accuracy")
myscatter(directional_accuracy_list, sharpe_list, title="Sharpe vs. Directional Accuracy")
myscatter(kendalltau_list, sharpe_list, title="Sharpe vs. Kendall's Tau")
myscatter(excess_tp_list, sharpe_list, title="Sharpe vs. True Positives")


# In[45]:


# 4-way plot for blog post
fig = plotly.tools.make_subplots(rows=2, cols=2, subplot_titles=('MSE', 'Quintile Accuracy','Direction Accuracy', "Kendall's tau"))
fig.append_trace(Scatter(
    x = mse_list,
    y = sharpe_list,
    mode = 'markers'
), row=1, col=1)

fig.append_trace(Scatter(
    x = quintile_accuracy_list,
    y = sharpe_list,
    mode = 'markers'
), row=1, col=2)

fig.append_trace(Scatter(
    x = directional_accuracy_list,
    y = sharpe_list,
    mode = 'markers'
), row=2, col=1)

fig.append_trace(Scatter(
    x = kendalltau_list,
    y = sharpe_list,
    mode = 'markers'
), row=2, col=2)


fig['layout'].update(height=1000, width=1000, title='Sharpe ratio scatters vs.')
iplot(fig)


# In[35]:


corrmatrix = skmetricsframe[['sharpe', 'mse',  'kendalltau', 
                           'quantile_score', 'direction_score', 'truepositive_score']].corr()
corrmatrix


# In[47]:


with open('sklearn_corr.html', 'w') as fo:
    corrmatrix.to_html(fo, float_format=lambda x: '%.3f' % x)


# In[48]:


fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(corrmatrix, annot=True, fmt='.3f')
plt.show()


# ### Perform crossvalidation to find optimal regularization parameters for ElasticNet
# 

# In[71]:


sharpe_list = []
mse_list = []
kendalltau_list = []
excess_tp_list = []
quintile_accuracy_list = []
directional_accuracy_list = []
keys = []

def create_model(params):
    alpha, l1_ratio = params
    def myclosure():
        return sklearn.linear_model.ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    return myclosure

alphas = [1, 3, 10, 30, 100] 
l1_ratios = [.01, .05, .1, .5, .667, .75, .9, .95, .99, 1] # 1 = LASSO, 0 = Ridge

params_list = list(product(alphas, l1_ratios))
model_list = list(map(create_model, params_list))

for i in range(len(params_list)):
    print(params_list[i])
    estimator = model_list[i]
    backtestmodel = BacktestModel(X, Y, 
                                  create_model=estimator,
                                  coef_dict_param="all", 
                                  startindex=FIRST_TRAIN_MONTHS,
                                  fit_missing='mean',
                                  scaler=None)
    #backtestmodel.walkforward_xval(n_splits=5, verbose=False)
    backtestmodel.gen_predictions(verbose=False)
    backtestmodel.gen_returns(calc_returns, verbose=False)
    backtestmodel.report_returns(start_date=start_date_str, freq='M')
    backtestmodel.evaluate_predictions()
    backtestmodel.evaluate_quantiles(chart=False, verbose=False)
    keys.append(str(estimator)[8:-2])
    sharpe_list.append(backtestmodel.sharpe)
    mse_list.append(backtestmodel.mse)
    kendalltau_list.append(backtestmodel.kendalltau)
    excess_tp_list.append(backtestmodel.excess_tp)
    quintile_accuracy_list.append(backtestmodel.quintile_accuracy)
    directional_accuracy_list.append(backtestmodel.directional_accuracy)
    print("---")


# In[72]:


# make a dataframe with all the metrics

flatlist = []
for key, mse, sharpe, quantile_score, directional_score, kt, excess_tp in zip(params_list, mse_list, sharpe_list, 
                                                                              quintile_accuracy_list, directional_accuracy_list, kendalltau_list, excess_tp_list):
    flatlist.append([key[0], key[1], mse, sharpe, quantile_score, directional_score, kt, excess_tp])
metricsframe = pd.DataFrame(flatlist, columns = ['alpha', 'l1_ratio',
                                                 'mse', 'sharpe', 'quantile_score', 
                                                 'direction_score', 'kendalltau', 'truepositive_score'])

metricsframe.to_csv("ElasticNetmetrics.csv")
#metricsframe.sort_values(['kendalltau'], ascending=False)[['alpha','l1_ratio']].values[:12]


# In[29]:


metricsframe = pd.read_csv("ElasticNetmetrics.csv")[['alpha','l1_ratio','mse','sharpe','quantile_score','direction_score','kendalltau','truepositive_score']]
metricsframe


# In[50]:


with open('elastic_table.html', 'w') as fo:
    metricsframe.sort_values(['sharpe'], ascending=False).iloc[:5,:].to_html(fo, float_format=lambda x: '%.3f' % x)


# In[74]:


# do full backtest using selected parameters for ElasticNet
backtestmodel = BacktestModel(X, Y, 
                              create_model=create_model((10, 0.1)),
                              coef_dict_param="all", 
                              startindex=FIRST_TRAIN_MONTHS,
                              fit_missing='mean',
                              scaler=None)
#backtestmodel.walkforward_xval(n_splits=5, verbose=False)
backtestmodel.gen_predictions(verbose=False)
backtestmodel.gen_returns(calc_returns, verbose=False)
backtestmodel.report_returns(start_date=start_date_str, freq='M')
backtestmodel.evaluate_predictions()
backtestmodel.evaluate_quantiles(chart=False, verbose=False)
    
mychart([ backtestmodel.cumulative_return, perf_LASSO_each_timestep],["ElasticNet", "LASSO/OLS"], title="ElasticNet v. LASSO/OLS")


# In[75]:


# try 'timestep' instead of 'all' 
# no improvement
sharpe_list = []
mse_list = []
kendalltau_list = []
excess_tp_list = []
quintile_accuracy_list = []
directional_accuracy_list = []
keys = []

def create_model(params):
    alpha, l1_ratio = params
    def myclosure():
        return sklearn.linear_model.ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    return myclosure

alphas = [1, 3, 10, 30, 100] 
l1_ratios = [.01, .05, .1, .5, .667, .75, .9, .95, .99, 1] # 1 = LASSO, 0 = Ridge

params_list = list(product(alphas, l1_ratios))
model_list = list(map(create_model, params_list))

for i in range(len(params_list)):
    print(params_list[i])
    estimator = model_list[i]
    backtestmodel = BacktestModel(X, Y, 
                                  create_model=estimator,
                                  coef_dict_param="timestep", 
                                  startindex=FIRST_TRAIN_MONTHS,
                                  fit_missing='mean',
                                  scaler=None)
    #backtestmodel.walkforward_xval(n_splits=5, verbose=False)
    backtestmodel.gen_predictions(verbose=False)
    backtestmodel.gen_returns(calc_returns, verbose=False)
    backtestmodel.report_returns(start_date=start_date_str, freq='M')
    backtestmodel.evaluate_predictions()
    backtestmodel.evaluate_quantiles(chart=False, verbose=False)
    keys.append(str(estimator)[8:-2])
    sharpe_list.append(backtestmodel.sharpe)
    mse_list.append(backtestmodel.mse)
    kendalltau_list.append(backtestmodel.kendalltau)
    excess_tp_list.append(backtestmodel.excess_tp)
    quintile_accuracy_list.append(backtestmodel.quintile_accuracy)
    directional_accuracy_list.append(backtestmodel.directional_accuracy)
    print("---")


# In[76]:


# make a dataframe with all the metrics

flatlist = []
for key, mse, sharpe, quantile_score, directional_score, kt, excess_tp in zip(params_list, mse_list, sharpe_list, 
                                                                                      quintile_accuracy_list, directional_accuracy_list, kendalltau_list, excess_tp_list):
    flatlist.append([key[0], key[1], mse, sharpe, quantile_score, directional_score, kt, excess_tp])
elasticframe2 = pd.DataFrame(flatlist, columns = ['alpha', 'l1_ratio',
                                                 'mse', 'sharpe', 'quantile_score', 
                                                 'direction_score', 'kendalltau', 'truepositive_score'])

elasticframe2.to_csv("ElasticNetmetrics2.csv")
#metricsframe.sort_values(['kendalltau'], ascending=False)[['alpha','l1_ratio']].values[:12]


# In[30]:


elasticframe2 = pd.read_csv("ElasticNetmetrics2.csv")
elasticframe2


# In[78]:


# enumerate classifiers - didn't run these, maybe next time
estimators = []

for name, class_ in all_estimators():
    if issubclass(class_, sklearn.base.ClassifierMixin):
        if hasattr(class_, 'predict_proba'):
            print(class_)
            estimators.append(class_)


# In[79]:


estimators


# ### Perform cross-validation on Keras neural networks to select best architecture

# In[29]:


# use keras instead of sklearn MLPRegressor
# wrap keras model in a class 
# multioutput 30 predictions simultaneously to speed up 
# fit takes a list of response ys, predict returns a list of y_predict arrays
# no coef_dict
INPUT_DIM = X.shape[1]
print(INPUT_DIM)
OUTPUT_DIM = len(responses) # 30
BATCH_SIZE = 137
EPOCHS=1000
# 500 is arbitrary, which bugs me
# something one could do is backtest and walk-forward xval with an xval set
# leave 30 months out of training fold, train until error on those 30 months is minimized, eval on following fold
# the other thing that bugs me is higher epochs take longer and yield worse results, there's some 'regularization thruogh under-training' going on
# but stochastically will never train at same speed, so you can pick a model and it might need more or less training than 100 in production

class KerasBacktestModel(object):

    def __init__(self, 
                 n_hidden_layers = 2,
                 hidden_layer_size = 32,
                 reg_penalty = 0.0001,
                 epochs=EPOCHS,
                 verbose=True):
        
        self.epochs=epochs
        
        main_input = Input(shape=(INPUT_DIM,),
                           dtype='float32', 
                           name='main_input')
        lastlayer=main_input
        
        for i in range(n_hidden_layers):
            if verbose:
                print("layer %d size %d, reg_penalty %.8f" % (i + 1, 
                                                              hidden_layer_size, 
                                                              reg_penalty, 
                                                             ))
            lastlayer = Dense(units = hidden_layer_size, 
                              activation = 'relu',
                              kernel_initializer = keras.initializers.glorot_uniform(),
                              kernel_regularizer=keras.regularizers.l1_l2(reg_penalty),
                              name = "Dense%02d" % i)(lastlayer)
            
        outputs = []
        for i in range(OUTPUT_DIM):
            # OUTPUT_DIM outputs
            outputs.append(Dense(1,
                                 activation='linear',
                                 name='output%02d' % i)(lastlayer)
                          )
            
        self.model = Model(inputs=[main_input], outputs=outputs)
        if verbose:
            print(self.model.summary())
            
        self.model.compile(loss="mse", metrics=["mse"], optimizer="adam", loss_weights=[1.]*OUTPUT_DIM)
        
    def fit(self, X, Y, epochs=None, verbose=False):
        # convert Y to list of ys
        Y_list = [Y[:,i] for i in range(OUTPUT_DIM)]
        
        if epochs:
            self.epochs = epochs

        retval = self.model.fit(X,
                                Y_list,
                                batch_size=BATCH_SIZE,
                                epochs=self.epochs,
                                verbose=verbose)
        self.history = retval.history
        self.last_loss_array = self.history['loss']
        self.last_loss = self.history['loss'][-1] # includes regularization loss
        return retval
    
    
    def predict(self, X, verbose=False):
        y_list = self.model.predict(X)
        # convert list of ys to Y array
        npreds=len(y_list[0])
        Y_pred = [y.reshape(npreds) for y in y_list]
        Y_pred = np.array(Y_pred).transpose()    
        return Y_pred
    
    def save(self, modelname):
        self.model.save("%s.h5" % modelname)
        self.model.save_weights("%s_weights.h5" % modelname)


# In[29]:


# no hidden layer, linear activation = LinearRegression
# confirm similar results to sklearn LinearRegression
keras_model = KerasBacktestModel(n_hidden_layers = 0,
                                 reg_penalty = 0.0,
                                 verbose=True,
                                 epochs=2000)


# In[30]:


backtestmodel = BacktestModel(X, Y, 
                              model=keras_model, 
                              coef_dict_param=coef_dict_all, 
                              startindex=FIRST_TRAIN_MONTHS)
print(backtestmodel.fit_predict(121, npredict=3, verbose=True))

# should match linear regression model pretty closely


# In[31]:


# perform cross-validation on many Keras NN models
MODELPREFIX = "NN"


n_hiddens = [1, 2, 3]
layer_sizes = [1, 2, 4, 8, 16]
reg_penalties = [0.0, 0.1, 0.3, 1, 3, 10, 30, 100]
hyperparameter_combos = list(product(n_hiddens, layer_sizes, reg_penalties))

def create_keras_model(n_hidden_layers, layer_size, reg_penalty, verbose=False):
    return KerasBacktestModel(n_hidden_layers = n_hidden_layers,
                              hidden_layer_size = layer_size,
                              reg_penalty = reg_penalty,
                              verbose=verbose)

print("%s Running %d experiments" % (time.strftime("%H:%M:%S"), len(hyperparameter_combos)))
# should really just use a list and convert to dataframe
keys = []
mses = []
sharpes = []
quantile_scores = []
kendalltaus = []
excess_true_positives = []
directional_accuracies = []

for counter, param_list in enumerate(hyperparameter_combos):
    n_hidden_layers, layer_size, reg_penalty = param_list
    print("%s Running experiment %d of %d" % (time.strftime("%H:%M:%S"), counter+1, len(hyperparameter_combos)))
    key = (n_hidden_layers, layer_size, reg_penalty)
    print("%s n_hidden_layers = %d, hidden_layer_size = %d, reg_penalty = %.6f" % 
          (time.strftime("%H:%M:%S"), n_hidden_layers, layer_size, reg_penalty))
    
    experiment_model = BacktestModel(X, Y, 
                                     create_keras_model(n_hidden_layers,layer_size,reg_penalty), 
                                     coef_dict_param="all", 
                                     startindex=FIRST_TRAIN_MONTHS,
                                     fit_missing='mean',
                                     scaler=None)
    experiment_model.walkforward_xval(n_splits=5)
    experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, n_hidden_layers,layer_size,reg_penalty))
    experiment_model.gen_returns(calc_returns, verbose=False)
    experiment_model.report_returns(start_date=start_date_str, freq='M')
    experiment_model.evaluate_predictions()
    experiment_model.evaluate_quantiles(chart=False, verbose=False)
    print("---")
    keys.append(key)
    mses.append(experiment_model.mse)
    quantile_scores.append(experiment_model.quintile_accuracy)
    directional_accuracies.append(experiment_model.directional_accuracy)
    kendalltaus.append(experiment_model.kendalltau)
    excess_true_positives.append(experiment_model.excess_tp)
    sharpes.append(experiment_model.sharpe)


# In[33]:


# review metrics - not very good
flatlist = []
for key, mse, sharpe, quantile_score, directional_score, kt, excess_tp in zip(keys, mses, sharpes, 
                                                                              quantile_scores, directional_accuracies, kendalltaus, excess_true_positives):
    flatlist.append([key[0], key[1], key[2], mse, sharpe, quantile_score, directional_score, kt, excess_tp])
    
metricsframe = pd.DataFrame(flatlist, columns = ['layers', 'layer_size', 'reg_penalty',
                                                 'mse', 'sharpe', 'quantile_score', 
                                                 'direction_score', 'kendalltau', 'truepositive_score'])

metricsframe.to_csv("KerasNeuralNetMetrics.csv")
#metricsframe.sort_values(['kendalltau'], ascending=False)[['alpha','l1_ratio']].values[:12]


# In[2]:


metricsframe = pd.read_csv("KerasNeuralNetMetrics.csv")[['layers','layer_size','reg_penalty','mse','sharpe','quantile_score','direction_score','kendalltau','truepositive_score']]
metricsframe


# In[53]:


with open('nn_by_mse.html', 'w') as fo:
    metricsframe.sort_values(['mse'], ascending=True).iloc[:8,:].to_html(fo, float_format=lambda x: '%.3f' % x)
    
with open('nn_by_sharpe.html', 'w') as fo:
    metricsframe.sort_values(['sharpe'], ascending=False).iloc[:8,:].to_html(fo, float_format=lambda x: '%.3f' % x)    


# In[36]:


plot_matrix(metricsframe, "layers", "layer_size", x_suffix=" layers", y_suffix=" units")
plot_matrix(metricsframe, "layers", "reg_penalty", x_suffix=" layers", y_suffix=" p")
plot_matrix(metricsframe, "reg_penalty", "layer_size", x_suffix=" p", y_suffix="units")


# In[32]:


pd.DataFrame(metricsframe.groupby(['layers'])[['sharpe', 'mse', 'quantile_score', 'direction_score', 'kendalltau', 'truepositive_score']].mean())


# In[33]:


pd.DataFrame(metricsframe.groupby(['layer_size'])[['sharpe', 'mse', 'quantile_score', 'direction_score', 'kendalltau', 'truepositive_score']].mean())


# In[34]:


pd.DataFrame(metricsframe.groupby(['reg_penalty'])[['sharpe', 'mse', 'quantile_score', 'direction_score', 'kendalltau', 'truepositive_score']].mean())


# In[40]:


myscatter(mses, sharpes, title="Sharpe vs. MSE")
myscatter(quantile_scores, sharpes, title="Sharpe vs. Quantile Accuracy")
myscatter(directional_accuracies, sharpes, title="Sharpe vs. Directional Accuracy")
myscatter(kendalltaus, sharpes, title="Sharpe vs. Kendall's Tau")
myscatter(excess_true_positives, sharpes, title="Sharpe vs. True Positives")


# In[95]:


# run selected model
experiment_model = BacktestModel(X, Y, 
                                 create_keras_model(1,16,3.0),
                                 coef_dict_param="all", 
                                 startindex=FIRST_TRAIN_MONTHS,
                                 fit_missing='mean',
                                 scaler=None)
experiment_model.gen_predictions(verbose=False)
#experiment_model.walkforward_xval(n_splits=5)
#experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, n_hidden_layers,layer_size,reg_penalty))
experiment_model.gen_returns(calc_returns, verbose=False)
experiment_model.report_returns(start_date=start_date_str, freq='M')
experiment_model.evaluate_predictions()
experiment_model.evaluate_quantiles(chart=False, verbose=False)
experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, 1, 16, 3)


# In[ ]:


# backtest 2, 16, 0.3 with walk-forward MSE of 116.058 and walk-forward Sharpe of 0.779
# full backtest Sharpe is 0.157

def create_keras_model(n_hidden_layers, layer_size, reg_penalty, verbose=False):
    return KerasBacktestModel(n_hidden_layers = n_hidden_layers,
                              hidden_layer_size = layer_size,
                              reg_penalty = reg_penalty,
                              verbose=verbose)
# run selected model
experiment_model = BacktestModel(X, Y, 
                                 create_keras_model(2,16,0.3),
                                 coef_dict_param="all", 
                                 startindex=FIRST_TRAIN_MONTHS,
                                 fit_missing='mean',
                                 scaler=None)
experiment_model.gen_predictions(verbose=False)
#experiment_model.walkforward_xval(n_splits=5)
#experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, n_hidden_layers,layer_size,reg_penalty))
experiment_model.gen_returns(calc_returns, verbose=False)
experiment_model.report_returns(start_date=start_date_str, freq='M')
experiment_model.evaluate_predictions()
experiment_model.evaluate_quantiles(chart=False, verbose=False)
experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, 2, 16, 0.3)
#Mean return: 0.835%
#Monthly Sharpe ratio: 0.157
#OOS MSE across all predictions: 125.1283
#In-sample MSE: 14.3808
#Variance: 39.4097
#R-squared: -2.1751
#Avg rank correlation (Kendall's tau): 0.0159 (Expected: 0)
#5-quintile accuracy: 0.2184 (Expected: 0.2)
#Long/short/flat accuracy: 0.4707 (Expected: 0.44)
#Excess true positive in quintiles 1 + 5: 207.800000


# In[30]:


# backtest 1, 16, 30 with walk-forward MSE of 40.933 and walk-forward Sharpe of 0.411

def create_keras_model(n_hidden_layers, layer_size, reg_penalty, verbose=False):
    return KerasBacktestModel(n_hidden_layers = n_hidden_layers,
                              hidden_layer_size = layer_size,
                              reg_penalty = reg_penalty,
                              verbose=verbose)
# run selected model
experiment_model = BacktestModel(X, Y, 
                                 create_keras_model(1,16,30),
                                 coef_dict_param="all", 
                                 startindex=FIRST_TRAIN_MONTHS,
                                 fit_missing='mean',
                                 scaler=None)
experiment_model.gen_predictions(verbose=False)
#experiment_model.walkforward_xval(n_splits=5)
#experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, n_hidden_layers,layer_size,reg_penalty))
experiment_model.gen_returns(calc_returns, verbose=False)
experiment_model.report_returns(start_date=start_date_str, freq='M')
experiment_model.evaluate_predictions()
experiment_model.evaluate_quantiles(chart=False, verbose=False)
experiment_model.model.save("%s_%d%d_%.6f" % (MODELPREFIX, 1, 16, 30)


# In[ ]:


.....................................


12:32