In [1]:

from statsmodels.tsa.statespace.varmax import VARMAX
from statsmodels.tsa import stattools
from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np

%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

# https://www.kaggle.com/thebrownviking20/everything-you-can-do-with-a-time-series
plt.style.use('fivethirtyeight') 
# Above is a special style template for matplotlib, highly useful for visualizing time series data

We have app data that for each user pairs reported app metrics and app name:¶

In [2]:

appDf = pd.read_csv("app.data", names=["user", "date", "app", "metric"])
appDf['date'] = pd.to_datetime(appDf['date'])
appDf.info()
print(appDf.user.unique(), appDf.app.unique())
appDf.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 497 entries, 0 to 496
Data columns (total 4 columns):
user      497 non-null object
date      497 non-null datetime64[ns]
app       497 non-null object
metric    497 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 15.6+ KB
['user_2' 'user_1' 'user_3'] [' app_2' ' app_1' ' app_3']

Out[2]:

	user	date	app	metric
0	user_2	2017-08-28 02:41:48	app_2	0.00
1	user_1	2017-08-28 11:01:01	app_1	0.01
2	user_3	2017-08-28 16:41:55	app_1	0.10
3	user_3	2017-08-29 02:43:39	app_3	0.09
4	user_1	2017-08-29 07:00:25	app_1	0.02

In [3]:

plt.figure(figsize=(50,20))
plt.tick_params(axis='both', which='major', labelsize=25)

plt.axhline(y=0, linewidth=1, linestyle='--', color='grey')

user_colors = ['r', 'g', 'b']

for i, user in enumerate(appDf.user.unique()):
    userDf = appDf[appDf['user'] == user]

    base_color = user_colors[i]
    
    plt.subplot(1,len(appDf.user.unique()),i+1)
    
    for app in userDf.app.unique():
        uaDf = userDf[userDf['app'] == app]
        
        e95 = 1.96/np.sqrt(len(uaDf))
        line_color = matplotlib.colors.to_rgb(base_color)
                
        plt.axhline(y=e95, linewidth=1, linestyle='--', color=1 - np.array(line_color)*0.66)
        plt.axhline(y=-1 * e95, linewidth=1, linestyle='--', color=np.array(line_color)*0.66)
        plt.axhspan(e95, -1 * e95, facecolor=np.array(line_color)*0.66, alpha=0.05)

        plt.plot(stattools.acf(uaDf['metric'], fft=True, nlags=140), label=user + " " + app, color=line_color)
        
    plt.legend(prop={'size': 35})

plt.show()

Interstingly, in the above we see that there is autocorrelation for user_2's app metric, but user_1 and user_3 just show a positive trend.

We have location data that uses keywords for location and indicates a change of location:¶

In [4]:

locationDf = pd.read_csv("location.data", names=["user", "date", "location"])
locationDf['date'] = pd.to_datetime(locationDf['date'])

cross_l = pd.crosstab([locationDf.date, locationDf.user], locationDf.location)

cross_l.head(3)

Out[4]:

	location	bar	girlfriends	grocers	home	lunch	park	relatives	work
date	user
2017-08-28 00:00:01	user_2	0	0	0	1	0	0	0	0
2017-08-28 00:00:01	user_3	0	0	0	1	0	0	0	0
2017-08-28 19:25:16	user_3	0	0	0	0	0	0	1	0

In [5]:

plt.figure(figsize=(50,20))
plt.tick_params(axis='both', which='major', labelsize=25)

plt.axhline(y=0, linewidth=1, linestyle='--', color='grey')

user_colors = ['r', 'g', 'b']

unsorted = cross_l.copy()
unsorted.reset_index(inplace=True)

for i, user in enumerate(unsorted.user.unique()):
    userDf = unsorted[unsorted['user'] == user]
    sourceDf = locationDf[locationDf['user'] == user]

    base_color = user_colors[i]
    line_color = np.array(matplotlib.colors.to_rgb(base_color))
    
    plt.subplot(1,len(unsorted.user.unique()),i+1)
    
    for location in sourceDf.location.unique():
        e95 = 1.96/np.sqrt(len(userDf))
        line_color = line_color * 0.9
                
        plt.axhline(y=e95, linewidth=1, linestyle='--', color=1 - line_color*0.66)
        plt.axhline(y=-1 * e95, linewidth=1, linestyle='--', color=line_color*0.66)
        plt.axhspan(e95, -1 * e95, facecolor=line_color*0.66, alpha=0.05)

        plt.plot(stattools.acf(userDf[location], fft=True, nlags=350), label=user + " " + location, color=line_color)
        
    plt.legend(prop={'size': 35})

plt.show()

plt.figure(figsize=(50,20))
plt.tick_params(axis='both', which='major', labelsize=25)

plt.axhline(y=0, linewidth=1, linestyle='--', color='grey')

user_colors = ['r', 'g', 'b']

unsorted = cross_l.copy()
unsorted.reset_index(inplace=True)

for i, user in enumerate(unsorted.user.unique()):
    userDf = unsorted[unsorted['user'] == user]
    sourceDf = locationDf[locationDf['user'] == user]

    base_color = user_colors[i]
    line_color = np.array(matplotlib.colors.to_rgb(base_color))
    
    plt.subplot(1,len(unsorted.user.unique()),i+1)
    
    for location in sourceDf.location.unique():
        e95 = 1.96/np.sqrt(len(userDf))
        line_color = line_color * 0.9
                
        plt.axhline(y=e95, linewidth=1, linestyle='--', color=1 - line_color*0.66)
        plt.axhline(y=-1 * e95, linewidth=1, linestyle='--', color=line_color*0.66)
        plt.axhspan(e95, -1 * e95, facecolor=line_color*0.66, alpha=0.05)

        plt.plot(stattools.acf(userDf[location], fft=True, nlags=50), label=user + " " + location, color=line_color)
        
    plt.legend(prop={'size': 35})

plt.show()

Here we see both longer and shorter ACF. I chose to add the shorter time frame because, perhaps by coincidence, by and large there seems to be a significant peak in all data at about lag = 18. I also was interested in lag ≈ 40. However, at these long intervals, the 95% confidence we are plotting in should have a few false positives.

Seasonality¶

Location data includes seasonality at actual seasonal levels (summer, winter), as well as weekday/weekend behavior. Here this is demonstrated with cumsum:

In [6]:

cross = cross_l.copy()

l2 = cross.groupby(['user']).cumsum().copy()
l2.reset_index(inplace=True)

plotaxis = plt.figure(figsize=(50,20)).gca()
for key, grp in l2.groupby(['user']):
    my_ts = [ts.to_julian_date() - 1721424.5 for ts in grp['date']]
    plt.plot(my_ts, grp.drop('user', axis=1).drop('date', axis=1), label=key)

plotaxis.xaxis.set_major_formatter(
    matplotlib.dates.DateFormatter('%d/%m/%y')
)
xlabels = plotaxis.get_xticklabels()
plt.setp(xlabels, rotation=85, fontsize=25)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=25)

plt.legend(bbox_to_anchor=(.02, 0.52, 1., .102), loc=3,
           ncol=2, borderaxespad=0., prop={'size': 26})    

#l3 = l2[l2['user'] == 'user_1'].copy()
#l3['month']=l3['date'].dt.month
#grouped = l3.groupby(l3['month'])
#
#for m in grouped.groups.keys():
#    print(l3[l3['month'] == m].tail(1))
#l2.groupby(['user']).sum()

'User/Locations'

Out[6]:

'User/Locations'

The some metric data is set to have positive or negative correlation in terms of growth, to location data. Excluding shorter term seasonality, the metric data should trend upwards:

In [7]:

#appDf.groupby(['user', 'app']).plot(x="date", y="metric", subplots=True)

plotaxis = plt.figure(figsize=(50,20)).gca()
for key, grp in appDf.groupby(['user', 'app']):
    print(key)
    my_ts = [ts.to_julian_date() - 1721424.5 for ts in grp['date']]
    plt.plot(my_ts, grp['metric'], label='%s@%s' % ("metric", key))

plotaxis.xaxis.set_major_formatter(
    matplotlib.dates.DateFormatter('%d/%m/%y')
)
xlabels = plotaxis.get_xticklabels()
plt.setp(xlabels, rotation=85, fontsize=25)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=25)
'Users/Apps'

('user_1', ' app_1')
('user_1', ' app_2')
('user_2', ' app_2')
('user_3', ' app_1')
('user_3', ' app_3')

Out[7]:

'Users/Apps'

In the next plot, we can see in fine detail user_3's metric trend and location data. Looking at the very beginning at increased zoom, you might notice a visual correlation between the first few gaps and the faster-rising app data. Frequency of use should also positively correlate.

In [8]:

currentUser = 'user_1'

cross = cross_l.copy()

cross.reset_index(inplace=True)

apps = appDf[appDf['user'] == currentUser].groupby('user')['app'].unique()[0]

u = cross[cross['user'] == currentUser]

a = appDf[appDf['user'] == currentUser]

ua = []
for app in apps:
    ua.append(a[a['app'] == app].head(5))

plotaxis = plt.figure(figsize=(100,10)).gca()

for key, grp in a.groupby(['app']):
    my_ts = [ts.to_julian_date() for ts in grp['date']]
    plt.plot(grp['date'], grp['metric'], '.-', label='%s@%s' % ("metric", key))    

for key, grp in u.groupby(['user']):
    my_ts = [ts.to_julian_date() for ts in grp['date']]
    plt.plot(grp['date'], grp.drop('user', axis=1).drop('date', axis=1), label=key)
    
plotaxis.xaxis.set_major_formatter(
    matplotlib.dates.DateFormatter('%d/%m/%y')
)

xlabels = plotaxis.get_xticklabels()
plt.setp(xlabels, rotation=85, fontsize=25)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=25)
plt.legend(bbox_to_anchor=(.02, 1.1, 1., .102), loc=3,
           ncol=2, borderaxespad=0., prop={'size': 50})    
True

Out[8]:

True

Preparing the data for ARIMA¶

We want to produce classical machine learning/statistical modeling as a baseline to justify RNN approaches. We will bin at 15 minute intervals so we have little risk of multiple location changes or app metrics per bin.

We could leave it as a very sparse data set with only the initial location change events highlighted. However, that might leave little immediacy between the most recent location change event and the metric. When the metric occurs, it is very often at some location, but this data won't show that. Another option is to set the location value at 1 for all bins until the next location change event.

A third option is to have the location value have a half-life (in order to lessen autocorrelation).

First, we will follow this strategy in preparation of an ARIMA model:

resample to 15m intervals (data set would be insufficient to form the ARIMA model as the majority are errant occluding/masking/hiding the underlying predictive signal/model/equation/weighted average necessary to provide the baseline, so we will interpolate at the end)
fill rows in both set by date, so the data sets can be combined.
convert metric data to metric values per app: first creating a column for each app.
get column categories for location
divide data into per user charts.
combine app/metric and location data.
interpolate metric data per user to get curves we can predict from.

In [332]:

locDf = locationDf.copy()
locDf.set_index('date', inplace=True)

locDfs = {}
for user, user_loc_dc in locDf.groupby('user'):
    locDfs[user] = user_loc_dc.resample('15T').agg('max').bfill()
    
aDf = appDf.copy()
aDf.set_index('date', inplace=True)

userLocAppDfs = {}
for user, a2_df in aDf.groupby('user'):
    userDf = a2_df.resample('15T').agg('max')
                
    userDf.reset_index(inplace=True)
    userDf = pd.crosstab(index=userDf['date'], columns=userDf['app'], values=userDf['metric'], aggfunc=np.mean).fillna(np.nan, downcast='infer')

    userDf['user'] = user
        
    userDf.reset_index(inplace=True)
    userDf.set_index('date', inplace=True)

    userLocAppDfs[user] = userDf.resample('15T').agg('max')
            
    loDf = locDfs[user]
    loDf.reset_index(inplace=True)
    loDf = pd.crosstab([loDf.date, loDf.user], loDf.location)
    loDf.reset_index(inplace=True)

    loDf.set_index('date', inplace=True)
    loDf.drop('user', axis=1, inplace=True)
    
    userLocAppDfs[user] = userLocAppDfs[user].join(loDf, how='outer')
    userLocAppDfs[user] = userLocAppDfs[user].resample('15T').agg('max')
    userLocAppDfs[user]['user'].fillna(user, inplace=True)
    
    for loc in locationDf[locationDf['user'] == user].location.unique():
        userLocAppDfs[user][loc] = userLocAppDfs[user][loc].replace(np.nan, 0)
        
        a = userLocAppDfs[user][loc] != 0
        userLocAppDfs[user][loc] = a.cumsum()-a.cumsum().where(~a).ffill().fillna(0).astype(int)
        
        userLocAppDfs[user][loc]=(userLocAppDfs[user][loc]-userLocAppDfs[user][loc].min())/(userLocAppDfs[user][loc].max()-userLocAppDfs[user][loc].min())
            
    for app in a2_df['app'].unique():
        userLocAppDfs[user][app].interpolate(method='linear', limit_area='inside', inplace=True)       
        userLocAppDfs[user][app].fillna(value=0, inplace=True)
        
#userLocAppDfs['user_1'].tail(5)

At this point, we have our data set per user.

Sample data¶

Here's what one user's data looks like:

In [335]:

currentUser = 'user_1'
u = userLocAppDfs[currentUser].copy()
lonames = locDf[locDf['user'] == currentUser]['location'].unique()
# https://stackoverflow.com/questions/11927715/how-to-give-a-pandas-matplotlib-bar-graph-custom-colors
locations = [(x/8.75, x/40.0, 0.85) for x in range(lonames.size)] # color grad

u[lonames].plot(color=locations, figsize=(15,10))

appColors = ['r', 'orange', 'yellow']

for i, app in enumerate(appDf[appDf['user'] == currentUser]['app'].unique()):
    u[app].plot(color=appColors[i])

#u3[135:140]

In [337]:

#u1.to_csv('user1.data')

print(u.info())
u[4*24*30:4*24*30+10].plot()
u[4*24*30:4*24*30+10]

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 15011 entries, 2017-08-28 11:00:00 to 2018-01-31 19:30:00
Freq: 15T
Data columns (total 10 columns):
 app_1        15011 non-null float64
 app_2        15011 non-null float64
user          15011 non-null object
 bar          15011 non-null float64
 grocers      15011 non-null float64
 home         15011 non-null float64
 lunch        15011 non-null float64
 park         15011 non-null float64
 relatives    15011 non-null float64
 work         15011 non-null float64
dtypes: float64(9), object(1)
memory usage: 1.3+ MB
None

Out[337]:

	app_1	app_2	user	bar	grocers	home	lunch	park	relatives	work
date
2017-09-27 11:00:00	0.167934	0.080388	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.458333
2017-09-27 11:15:00	0.167851	0.080340	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.468750
2017-09-27 11:30:00	0.167769	0.080291	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.479167
2017-09-27 11:45:00	0.167686	0.080243	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.489583
2017-09-27 12:00:00	0.167603	0.080194	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.500000
2017-09-27 12:15:00	0.167521	0.080146	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.510417
2017-09-27 12:30:00	0.167438	0.080097	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.520833
2017-09-27 12:45:00	0.167355	0.080049	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.531250
2017-09-27 13:00:00	0.167273	0.080000	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.541667
2017-09-27 13:15:00	0.167190	0.081034	user_1	0.0	0.0	0.0	0.0	0.0	0.0	0.552083

okay, let's fit a model to data through oct 3 to see if we can predict the down-trend in app_1.

In [346]:

import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import grangercausalitytests as gc

gc(u[[' app_1',' home']].dropna(),1)

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=39.0717 , p=0.0000  , df_denom=15007, df_num=1
ssr based chi2 test:   chi2=39.0795 , p=0.0000  , df=1
likelihood ratio test: chi2=39.0287 , p=0.0000  , df=1
parameter F test:         F=39.0717 , p=0.0000  , df_denom=15007, df_num=1

Out[346]:

{1: ({'ssr_ftest': (39.071672024882346, 4.1957245732610127e-10, 15007.0, 1),
   'ssr_chi2test': (39.07948271429893, 4.068977362162096e-10, 1),
   'lrtest': (39.02869789255783, 4.176211994187053e-10, 1),
   'params_ftest': (39.07167202458451, 4.1957245739007573e-10, 15007.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c4ed71ac8>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c4ed71208>,
   array([[0., 1., 0.]])])}

In [338]:

DAYS = 4 * 24
size = DAYS*54 # hoping

endog = apps.tolist()
exog = lonames.tolist() # all possible exog variables -- will be reset to remove constants later

columns = endog.copy()
columns.extend(exog)

series = u[columns].copy()

endogdiffs = []
for app in endog:
    min_nonzero = series[series[app] > 0].min()[app]
    print(app, min_nonzero, series[series[app] > 0][app].iloc[0])
    series.loc[series[app] == 0, app] = min_nonzero# - 0.00001
    series[app + '_log_diff'] = np.log(series[app]).diff()
    endogdiffs.append(app + '_log_diff')

series = series.replace([np.inf, -np.inf], np.nan).dropna()  
series[:size][endogdiffs].plot()

 app_1 0.01 0.01
 app_2 0.02 0.02

Out[338]:

<matplotlib.axes._subplots.AxesSubplot at 0x1c4deb0f60>

In [339]:

train, test = series[:size], series[size:size+(14*DAYS)]

train = train.loc[:, (train != train.iloc[0]).any()] # https://stackoverflow.com/questions/20209600/panda-dataframe-remove-constant-column
test = test.loc[:, (test != test.iloc[0]).any()]

ete = [x for x in list(test.loc[:, (test != test.iloc[0]).any()]) if ((x in columns) and (x not in endog))]
etr = [x for x in list(train.loc[:, (train != train.iloc[0]).any()]) if ((x in columns) and (x not in endog))]
exog = list(set(ete) & set(etr))
print(exog)

#print(train.var(), X.info())

[' work', ' grocers', ' lunch', ' bar', ' relatives', ' home']

In [356]:

# train autoregression
#model = VARMAX(train[endog], order=(1,0), exog=train[exog])
model = VARMAX(train[endogdiffs], order=(40,1,40), trend='nc', exog=train[exog])

/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/statespace/varmax.py:152: EstimationWarning: Estimation of VARMA(p,q) models is not generically robust, due especially to identification issues.
  EstimationWarning)

In [ ]:

model_fit = model.fit(model='newton')
#model_fit = model.fit(model='cg')
#print(model_fit.mle_retvals)

model_fit.plot_diagnostics()

##window = model_fit.k_ar
coef = model_fit.params

predictions = pd.DataFrame()
predictions = model_fit.forecast(steps=len(test), exog=test[exog])

y = predictions.copy()
print(y.head(3))

In [355]:

# plot
plotaxis = plt.figure(figsize=(15,10)).gca()

xlabels = plotaxis.get_xticklabels()
plt§.setp(xlabels, rotation=85, fontsize=10)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=10)

for i, column in enumerate(exog):
    plt.plot(train[column], color=locations[i], linewidth=1.0)
    plt.plot(test[column], color=locations[i], linewidth=1.0)

appColors = ['r', 'orange', 'yellow']
for i, app in enumerate(endog):
    print(app, train[app].first_valid_index(), train[app].iloc[0], u[columns][app][train[app].first_valid_index()])
    plt.plot(np.exp(train[app + '_log_diff'].append(y[app + '_log_diff']).cumsum()), color=[(i/10)+0.5, (i/10)+0.5, (i/10)+0.5])
    plt.plot(np.exp(train[app + '_log_diff'].append(test[app + '_log_diff']).cumsum()), color=appColors[i])
    
plt.show()

 app_1 2017-08-28 11:15:00 0.010125 0.010125
 app_2 2017-08-28 11:15:00 0.02 0.0

Okay so this isn't a great fit, but it is a start. We see that in general the model expects the numbers to stay lower, this is probably because we are capturing the rise from start at 0, without sufficient stationary momentum.

On the negative, the deep dive at the start on the value at 1 suggests that we probably should stop using data for a day or so after 1.0 is reached, the ceiling effects the results.

On the positive, some of the trend moments are easy to see in the prediction, and is set at the right time windows given exog data. Provided we can actually predict exogenous data with sufficiency, we can hope to get good results predicting preceipitous drops. This is all the more true since the app can likely expect to be run more than every few days, so the prediction only needs to consider a short term. The big drop we were hoping to see in day two is barely a blip, but if there is a cutoff slope for signficance in the trend, we can make notification predictions even without accurate metric.

To see if a neural network can easily bring added value, we need to run a prediction without provided exogenous data.

In [145]:

import tensorflow as tf
from tensorflow.python.ops import math_ops
from ESN import EchoStateRNNCell

# takes only current needed GPU memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

# random numbers
random_seed = 1
rng = np.random.RandomState(random_seed)

In [156]:

# Global variables
batches = 1
stime = len(train)
num_units = len(columns)
num_inputs = len(columns)
rnn_init_state = np.zeros([1, num_units], dtype="float32")

rnn_cols = np.concatenate([endogdiffs, exog])

rnn_inputs = np.zeros((batches, stime, num_inputs), dtype="float32")
rnn_inputs = train[rnn_cols].values.reshape((1, stime, num_inputs))

plt.plot(rnn_inputs[0,:,:], linewidth=1.0)
plt.show()

rnniDf = pd.DataFrame(rnn_inputs.reshape(stime, num_inputs))
plt.plot(np.exp(rnniDf[[0, 1]].cumsum()), linewidth=1.0)
#plt.ylim(-0.01, 0.01)
plt.show()

activation = lambda x: math_ops.tanh(x)

In [172]:

tf.reset_default_graph()
dynamic_graph = tf.Graph()
with dynamic_graph.as_default() as g:
    
    rng = np.random.RandomState(random_seed)

    # Init the ESN cell
    cell = EchoStateRNNCell(num_units=num_units, 
                            num_inputs=num_inputs,
                            activation=activation, 
                            decay=0.002, 
                            epsilon=1e-10,
                            alpha=0.5,
                            optimize=True,
                            optimize_vars=["rho", "decay","alpha", "sw"],
                            rng=rng)
    
    inputs = tf.placeholder(tf.float32, [batches, stime, num_inputs])
    init_state = tf.placeholder(tf.float32, [1, num_units])

    # Build the graph using the API
    states, final_state = tf.nn.dynamic_rnn(cell, 
                                            inputs, 
                                            initial_state=init_state)
    
    # tf.nn.dynamic_rnn returns the state, not the output of the cell
    outputs = tf.reshape(activation(states), [stime, num_units])
     
    # Run the simulation
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
                 
        stored_outputs = sess.run(outputs, feed_dict={inputs:rnn_inputs,
                                                   init_state:rnn_init_state})
                
        plt.plot(np.squeeze(stored_outputs), linewidth=1.0)       
        plt.show()
        
        esnoDf = pd.DataFrame(np.squeeze(stored_outputs).reshape(stime, num_inputs))
        plt.plot(esnoDf[0], linewidth=1.0, color='yellow')
        plt.plot(esnoDf[1], linewidth=1.0, color='black')
        plt.plot(rnniDf[[0, 1]], linewidth=1.0)
        #plt.ylim(0, 1)
        plt.show()

In [95]:

tf.reset_default_graph()

graph = tf.Graph()
with graph.as_default() as g:
    
    rng = np.random.RandomState(random_seed)
    lr = 0.01
    
    # Build the graph
    
    inputs = tf.placeholder(tf.float32, [batches, stime, num_inputs])
    target = tf.placeholder(tf.float32, [stime, 1])
    init_state = tf.placeholder(tf.float32, [1, num_units])

    # Init the ESN cell
    print("Making ESN init graph ...")
    cell = EchoStateRNNCell(num_units=num_units, 
                            activation=activation, 
                            decay=0.1, 
                            alpha=0.5,
                            rng=rng, 
                            optimize=True,
                            optimize_vars=["rho", "decay","alpha", "sw"])
    print("Done")
    
    # cell spreading of activations
    print("Making ESN spreading graph ...")
    states = []
    state = init_state
    for t in range(stime):
        state,_ = cell(inputs=inputs[0,t:(t+1),:], state=state)
        states.append(state)
    outputs = tf.reshape(states, [stime, num_units])   
    print("Done")

    # ridge regression
    print("Making regression graph ...")
    # do the regression on a training subset of the timeseries
    begin =  0
    end = stime
    
    # optimize also lambda
    lmb = tf.get_variable("lmb", initializer=0.1, 
                          dtype=tf.float32, trainable=True)
    
    output_slice = outputs[begin:end,:]

    Wout = tf.matmul( 
            tf.matrix_inverse(tf.matmul(tf.transpose(output_slice), output_slice) +
                lmb*tf.eye(num_units)),
            tf.matmul(tf.transpose(output_slice), target[begin:end,:]) )
    print("Done")
    
    # readout
    print("Making readout spreading graph ...")
    readouts = tf.matmul(outputs, Wout)
    print("Done") 
    
    # train graph
    print("Making training graph ...")    
    # calculate the loss over all the timeseries (escluded the beginning)
    clip_rho = cell.rho.assign(tf.clip_by_value(cell.rho, 0.0, 1.0))
    clip_alpha = cell.alpha.assign(tf.clip_by_value(cell.alpha, 0.0, 1.0))
    clip_decay = cell.decay.assign(tf.clip_by_value(cell.decay, 0.0, 1.0))
    clip_sw = cell.decay.assign(tf.clip_by_value(cell.sw, 0.0001, 0.5))
    clip_lmb = cell.decay.assign(tf.clip_by_value(lmb, 0.0001, 0.5))
    clip = tf.group(clip_rho, clip_alpha, clip_decay,clip_sw, clip_lmb)
    loss = NRMSE(target[begin:end,:], readouts[begin:end,:]) 
    try: # if optimize == True
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train = optimizer.minimize(loss)
    except ValueError: # if optimize == False
        train = tf.get_variable("trial", (), dtype=None)
    print("Done")

Making ESN init graph ...
Done
Making ESN spreading graph ...

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1625   try:
-> 1626     c_op = c_api.TF_FinishOperation(op_desc)
   1627   except errors.InvalidArgumentError as e:

InvalidArgumentError: Dimensions must be equal, but are 8 and 1 for 'echo_state_rnn_cell/MatMul' (op: 'MatMul') with input shapes: [1,8], [1,8].

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-95-4604f6391adb> in <module>()
     29     state = init_state
     30     for t in range(stime):
---> 31         state,_ = cell(inputs=inputs[0,t:(t+1),:], state=state)
     32         states.append(state)
     33     outputs = tf.reshape(states, [stime, num_units])

/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
    231         setattr(self, scope_attrname, scope)
    232       with scope:
--> 233         return super(RNNCell, self).__call__(inputs, state)
    234 
    235   def _rnn_get_variable(self, getter, *args, **kwargs):

/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
    362 
    363       # Actually call layer
--> 364       outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
    365 
    366     if not context.executing_eagerly():

/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    767 
    768       if not in_deferred_mode:
--> 769         outputs = self.call(inputs, *args, **kwargs)
    770         if outputs is None:
    771           raise ValueError('A layer\'s `call` method should return a Tensor '

~/Public/Harvard/Thesis/data/generateFakedData/ESN.py in call(self, inputs, state)
    119         new_state = state + self.decay*(
    120                 self._activation(
--> 121                     tf.matmul(inputs, self.W * self.sw) +
    122                     tf.matmul(self._activation(state), self.U * self.rho_one * self.rho)
    123                 )

/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py in matmul(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name)
   2051     else:
   2052       return gen_math_ops.mat_mul(
-> 2053           a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
   2054 
   2055 

/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py in mat_mul(a, b, transpose_a, transpose_b, name)
   4558     _, _, _op = _op_def_lib._apply_op_helper(
   4559         "MatMul", a=a, b=b, transpose_a=transpose_a, transpose_b=transpose_b,
-> 4560         name=name)
   4561     _result = _op.outputs[:]
   4562     _inputs_flat = _op.inputs

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    785         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    786                          input_types=input_types, attrs=attr_protos,
--> 787                          op_def=op_def)
    788       return output_structure, op_def.is_stateful, op
    789 

/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
    486                 'in a future version' if date is None else ('after %s' % date),
    487                 instructions)
--> 488       return func(*args, **kwargs)
    489     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    490                                        _add_deprecated_arg_notice_to_docstring(

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in create_op(***failed resolving arguments***)
   3270           input_types=input_types,
   3271           original_op=self._default_original_op,
-> 3272           op_def=op_def)
   3273       self._create_op_helper(ret, compute_device=compute_device)
   3274     return ret

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
   1788           op_def, inputs, node_def.attr)
   1789       self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1790                                 control_input_ops)
   1791 
   1792     # Initialize self._outputs.

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1627   except errors.InvalidArgumentError as e:
   1628     # Convert to ValueError for backwards compatibility.
-> 1629     raise ValueError(str(e))
   1630 
   1631   return c_op

ValueError: Dimensions must be equal, but are 8 and 1 for 'echo_state_rnn_cell/MatMul' (op: 'MatMul') with input shapes: [1,8], [1,8].

In [ ]: