from statsmodels.tsa.statespace.varmax import VARMAX
from statsmodels.tsa import stattools
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
# https://www.kaggle.com/thebrownviking20/everything-you-can-do-with-a-time-series
plt.style.use('fivethirtyeight')
# Above is a special style template for matplotlib, highly useful for visualizing time series data
appDf = pd.read_csv("app.data", names=["user", "date", "app", "metric"])
appDf['date'] = pd.to_datetime(appDf['date'])
appDf.info()
print(appDf.user.unique(), appDf.app.unique())
appDf.head(5)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 497 entries, 0 to 496 Data columns (total 4 columns): user 497 non-null object date 497 non-null datetime64[ns] app 497 non-null object metric 497 non-null float64 dtypes: datetime64[ns](1), float64(1), object(2) memory usage: 15.6+ KB ['user_2' 'user_1' 'user_3'] [' app_2' ' app_1' ' app_3']
user | date | app | metric | |
---|---|---|---|---|
0 | user_2 | 2017-08-28 02:41:48 | app_2 | 0.00 |
1 | user_1 | 2017-08-28 11:01:01 | app_1 | 0.01 |
2 | user_3 | 2017-08-28 16:41:55 | app_1 | 0.10 |
3 | user_3 | 2017-08-29 02:43:39 | app_3 | 0.09 |
4 | user_1 | 2017-08-29 07:00:25 | app_1 | 0.02 |
plt.figure(figsize=(50,20))
plt.tick_params(axis='both', which='major', labelsize=25)
plt.axhline(y=0, linewidth=1, linestyle='--', color='grey')
user_colors = ['r', 'g', 'b']
for i, user in enumerate(appDf.user.unique()):
userDf = appDf[appDf['user'] == user]
base_color = user_colors[i]
plt.subplot(1,len(appDf.user.unique()),i+1)
for app in userDf.app.unique():
uaDf = userDf[userDf['app'] == app]
e95 = 1.96/np.sqrt(len(uaDf))
line_color = matplotlib.colors.to_rgb(base_color)
plt.axhline(y=e95, linewidth=1, linestyle='--', color=1 - np.array(line_color)*0.66)
plt.axhline(y=-1 * e95, linewidth=1, linestyle='--', color=np.array(line_color)*0.66)
plt.axhspan(e95, -1 * e95, facecolor=np.array(line_color)*0.66, alpha=0.05)
plt.plot(stattools.acf(uaDf['metric'], fft=True, nlags=140), label=user + " " + app, color=line_color)
plt.legend(prop={'size': 35})
plt.show()
Interstingly, in the above we see that there is autocorrelation for user_2's app metric, but user_1 and user_3 just show a positive trend.
locationDf = pd.read_csv("location.data", names=["user", "date", "location"])
locationDf['date'] = pd.to_datetime(locationDf['date'])
cross_l = pd.crosstab([locationDf.date, locationDf.user], locationDf.location)
cross_l.head(3)
location | bar | girlfriends | grocers | home | lunch | park | relatives | work | |
---|---|---|---|---|---|---|---|---|---|
date | user | ||||||||
2017-08-28 00:00:01 | user_2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
user_3 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | |
2017-08-28 19:25:16 | user_3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
plt.figure(figsize=(50,20))
plt.tick_params(axis='both', which='major', labelsize=25)
plt.axhline(y=0, linewidth=1, linestyle='--', color='grey')
user_colors = ['r', 'g', 'b']
unsorted = cross_l.copy()
unsorted.reset_index(inplace=True)
for i, user in enumerate(unsorted.user.unique()):
userDf = unsorted[unsorted['user'] == user]
sourceDf = locationDf[locationDf['user'] == user]
base_color = user_colors[i]
line_color = np.array(matplotlib.colors.to_rgb(base_color))
plt.subplot(1,len(unsorted.user.unique()),i+1)
for location in sourceDf.location.unique():
e95 = 1.96/np.sqrt(len(userDf))
line_color = line_color * 0.9
plt.axhline(y=e95, linewidth=1, linestyle='--', color=1 - line_color*0.66)
plt.axhline(y=-1 * e95, linewidth=1, linestyle='--', color=line_color*0.66)
plt.axhspan(e95, -1 * e95, facecolor=line_color*0.66, alpha=0.05)
plt.plot(stattools.acf(userDf[location], fft=True, nlags=350), label=user + " " + location, color=line_color)
plt.legend(prop={'size': 35})
plt.show()
plt.figure(figsize=(50,20))
plt.tick_params(axis='both', which='major', labelsize=25)
plt.axhline(y=0, linewidth=1, linestyle='--', color='grey')
user_colors = ['r', 'g', 'b']
unsorted = cross_l.copy()
unsorted.reset_index(inplace=True)
for i, user in enumerate(unsorted.user.unique()):
userDf = unsorted[unsorted['user'] == user]
sourceDf = locationDf[locationDf['user'] == user]
base_color = user_colors[i]
line_color = np.array(matplotlib.colors.to_rgb(base_color))
plt.subplot(1,len(unsorted.user.unique()),i+1)
for location in sourceDf.location.unique():
e95 = 1.96/np.sqrt(len(userDf))
line_color = line_color * 0.9
plt.axhline(y=e95, linewidth=1, linestyle='--', color=1 - line_color*0.66)
plt.axhline(y=-1 * e95, linewidth=1, linestyle='--', color=line_color*0.66)
plt.axhspan(e95, -1 * e95, facecolor=line_color*0.66, alpha=0.05)
plt.plot(stattools.acf(userDf[location], fft=True, nlags=50), label=user + " " + location, color=line_color)
plt.legend(prop={'size': 35})
plt.show()
Here we see both longer and shorter ACF. I chose to add the shorter time frame because, perhaps by coincidence, by and large there seems to be a significant peak in all data at about lag = 18. I also was interested in lag ≈ 40. However, at these long intervals, the 95% confidence we are plotting in should have a few false positives.
Location data includes seasonality at actual seasonal levels (summer, winter), as well as weekday/weekend behavior. Here this is demonstrated with cumsum:
cross = cross_l.copy()
l2 = cross.groupby(['user']).cumsum().copy()
l2.reset_index(inplace=True)
plotaxis = plt.figure(figsize=(50,20)).gca()
for key, grp in l2.groupby(['user']):
my_ts = [ts.to_julian_date() - 1721424.5 for ts in grp['date']]
plt.plot(my_ts, grp.drop('user', axis=1).drop('date', axis=1), label=key)
plotaxis.xaxis.set_major_formatter(
matplotlib.dates.DateFormatter('%d/%m/%y')
)
xlabels = plotaxis.get_xticklabels()
plt.setp(xlabels, rotation=85, fontsize=25)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=25)
plt.legend(bbox_to_anchor=(.02, 0.52, 1., .102), loc=3,
ncol=2, borderaxespad=0., prop={'size': 26})
#l3 = l2[l2['user'] == 'user_1'].copy()
#l3['month']=l3['date'].dt.month
#grouped = l3.groupby(l3['month'])
#
#for m in grouped.groups.keys():
# print(l3[l3['month'] == m].tail(1))
#l2.groupby(['user']).sum()
'User/Locations'
'User/Locations'
The some metric data is set to have positive or negative correlation in terms of growth, to location data. Excluding shorter term seasonality, the metric data should trend upwards:
#appDf.groupby(['user', 'app']).plot(x="date", y="metric", subplots=True)
plotaxis = plt.figure(figsize=(50,20)).gca()
for key, grp in appDf.groupby(['user', 'app']):
print(key)
my_ts = [ts.to_julian_date() - 1721424.5 for ts in grp['date']]
plt.plot(my_ts, grp['metric'], label='%s@%s' % ("metric", key))
plotaxis.xaxis.set_major_formatter(
matplotlib.dates.DateFormatter('%d/%m/%y')
)
xlabels = plotaxis.get_xticklabels()
plt.setp(xlabels, rotation=85, fontsize=25)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=25)
'Users/Apps'
('user_1', ' app_1') ('user_1', ' app_2') ('user_2', ' app_2') ('user_3', ' app_1') ('user_3', ' app_3')
'Users/Apps'
In the next plot, we can see in fine detail user_3's metric trend and location data. Looking at the very beginning at increased zoom, you might notice a visual correlation between the first few gaps and the faster-rising app data. Frequency of use should also positively correlate.
currentUser = 'user_1'
cross = cross_l.copy()
cross.reset_index(inplace=True)
apps = appDf[appDf['user'] == currentUser].groupby('user')['app'].unique()[0]
u = cross[cross['user'] == currentUser]
a = appDf[appDf['user'] == currentUser]
ua = []
for app in apps:
ua.append(a[a['app'] == app].head(5))
plotaxis = plt.figure(figsize=(100,10)).gca()
for key, grp in a.groupby(['app']):
my_ts = [ts.to_julian_date() for ts in grp['date']]
plt.plot(grp['date'], grp['metric'], '.-', label='%s@%s' % ("metric", key))
for key, grp in u.groupby(['user']):
my_ts = [ts.to_julian_date() for ts in grp['date']]
plt.plot(grp['date'], grp.drop('user', axis=1).drop('date', axis=1), label=key)
plotaxis.xaxis.set_major_formatter(
matplotlib.dates.DateFormatter('%d/%m/%y')
)
xlabels = plotaxis.get_xticklabels()
plt.setp(xlabels, rotation=85, fontsize=25)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=25)
plt.legend(bbox_to_anchor=(.02, 1.1, 1., .102), loc=3,
ncol=2, borderaxespad=0., prop={'size': 50})
True
True
We want to produce classical machine learning/statistical modeling as a baseline to justify RNN approaches. We will bin at 15 minute intervals so we have little risk of multiple location changes or app metrics per bin.
We could leave it as a very sparse data set with only the initial location change events highlighted. However, that might leave little immediacy between the most recent location change event and the metric. When the metric occurs, it is very often at some location, but this data won't show that. Another option is to set the location value at 1 for all bins until the next location change event.
A third option is to have the location value have a half-life (in order to lessen autocorrelation).
First, we will follow this strategy in preparation of an ARIMA model:
locDf = locationDf.copy()
locDf.set_index('date', inplace=True)
locDfs = {}
for user, user_loc_dc in locDf.groupby('user'):
locDfs[user] = user_loc_dc.resample('15T').agg('max').bfill()
aDf = appDf.copy()
aDf.set_index('date', inplace=True)
userLocAppDfs = {}
for user, a2_df in aDf.groupby('user'):
userDf = a2_df.resample('15T').agg('max')
userDf.reset_index(inplace=True)
userDf = pd.crosstab(index=userDf['date'], columns=userDf['app'], values=userDf['metric'], aggfunc=np.mean).fillna(np.nan, downcast='infer')
userDf['user'] = user
userDf.reset_index(inplace=True)
userDf.set_index('date', inplace=True)
userLocAppDfs[user] = userDf.resample('15T').agg('max')
loDf = locDfs[user]
loDf.reset_index(inplace=True)
loDf = pd.crosstab([loDf.date, loDf.user], loDf.location)
loDf.reset_index(inplace=True)
loDf.set_index('date', inplace=True)
loDf.drop('user', axis=1, inplace=True)
userLocAppDfs[user] = userLocAppDfs[user].join(loDf, how='outer')
userLocAppDfs[user] = userLocAppDfs[user].resample('15T').agg('max')
userLocAppDfs[user]['user'].fillna(user, inplace=True)
for loc in locationDf[locationDf['user'] == user].location.unique():
userLocAppDfs[user][loc] = userLocAppDfs[user][loc].replace(np.nan, 0)
a = userLocAppDfs[user][loc] != 0
userLocAppDfs[user][loc] = a.cumsum()-a.cumsum().where(~a).ffill().fillna(0).astype(int)
userLocAppDfs[user][loc]=(userLocAppDfs[user][loc]-userLocAppDfs[user][loc].min())/(userLocAppDfs[user][loc].max()-userLocAppDfs[user][loc].min())
for app in a2_df['app'].unique():
userLocAppDfs[user][app].interpolate(method='linear', limit_area='inside', inplace=True)
userLocAppDfs[user][app].fillna(value=0, inplace=True)
#userLocAppDfs['user_1'].tail(5)
currentUser = 'user_1'
u = userLocAppDfs[currentUser].copy()
lonames = locDf[locDf['user'] == currentUser]['location'].unique()
# https://stackoverflow.com/questions/11927715/how-to-give-a-pandas-matplotlib-bar-graph-custom-colors
locations = [(x/8.75, x/40.0, 0.85) for x in range(lonames.size)] # color grad
u[lonames].plot(color=locations, figsize=(15,10))
appColors = ['r', 'orange', 'yellow']
for i, app in enumerate(appDf[appDf['user'] == currentUser]['app'].unique()):
u[app].plot(color=appColors[i])
#u3[135:140]
#u1.to_csv('user1.data')
print(u.info())
u[4*24*30:4*24*30+10].plot()
u[4*24*30:4*24*30+10]
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 15011 entries, 2017-08-28 11:00:00 to 2018-01-31 19:30:00 Freq: 15T Data columns (total 10 columns): app_1 15011 non-null float64 app_2 15011 non-null float64 user 15011 non-null object bar 15011 non-null float64 grocers 15011 non-null float64 home 15011 non-null float64 lunch 15011 non-null float64 park 15011 non-null float64 relatives 15011 non-null float64 work 15011 non-null float64 dtypes: float64(9), object(1) memory usage: 1.3+ MB None
app_1 | app_2 | user | bar | grocers | home | lunch | park | relatives | work | |
---|---|---|---|---|---|---|---|---|---|---|
date | ||||||||||
2017-09-27 11:00:00 | 0.167934 | 0.080388 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.458333 |
2017-09-27 11:15:00 | 0.167851 | 0.080340 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.468750 |
2017-09-27 11:30:00 | 0.167769 | 0.080291 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.479167 |
2017-09-27 11:45:00 | 0.167686 | 0.080243 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.489583 |
2017-09-27 12:00:00 | 0.167603 | 0.080194 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.500000 |
2017-09-27 12:15:00 | 0.167521 | 0.080146 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.510417 |
2017-09-27 12:30:00 | 0.167438 | 0.080097 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.520833 |
2017-09-27 12:45:00 | 0.167355 | 0.080049 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.531250 |
2017-09-27 13:00:00 | 0.167273 | 0.080000 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.541667 |
2017-09-27 13:15:00 | 0.167190 | 0.081034 | user_1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.552083 |
okay, let's fit a model to data through oct 3 to see if we can predict the down-trend in app_1.
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import grangercausalitytests as gc
gc(u[[' app_1',' home']].dropna(),1)
Granger Causality number of lags (no zero) 1 ssr based F test: F=39.0717 , p=0.0000 , df_denom=15007, df_num=1 ssr based chi2 test: chi2=39.0795 , p=0.0000 , df=1 likelihood ratio test: chi2=39.0287 , p=0.0000 , df=1 parameter F test: F=39.0717 , p=0.0000 , df_denom=15007, df_num=1
{1: ({'ssr_ftest': (39.071672024882346, 4.1957245732610127e-10, 15007.0, 1), 'ssr_chi2test': (39.07948271429893, 4.068977362162096e-10, 1), 'lrtest': (39.02869789255783, 4.176211994187053e-10, 1), 'params_ftest': (39.07167202458451, 4.1957245739007573e-10, 15007.0, 1.0)}, [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c4ed71ac8>, <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c4ed71208>, array([[0., 1., 0.]])])}
DAYS = 4 * 24
size = DAYS*54 # hoping
endog = apps.tolist()
exog = lonames.tolist() # all possible exog variables -- will be reset to remove constants later
columns = endog.copy()
columns.extend(exog)
series = u[columns].copy()
endogdiffs = []
for app in endog:
min_nonzero = series[series[app] > 0].min()[app]
print(app, min_nonzero, series[series[app] > 0][app].iloc[0])
series.loc[series[app] == 0, app] = min_nonzero# - 0.00001
series[app + '_log_diff'] = np.log(series[app]).diff()
endogdiffs.append(app + '_log_diff')
series = series.replace([np.inf, -np.inf], np.nan).dropna()
series[:size][endogdiffs].plot()
app_1 0.01 0.01 app_2 0.02 0.02
<matplotlib.axes._subplots.AxesSubplot at 0x1c4deb0f60>
train, test = series[:size], series[size:size+(14*DAYS)]
train = train.loc[:, (train != train.iloc[0]).any()] # https://stackoverflow.com/questions/20209600/panda-dataframe-remove-constant-column
test = test.loc[:, (test != test.iloc[0]).any()]
ete = [x for x in list(test.loc[:, (test != test.iloc[0]).any()]) if ((x in columns) and (x not in endog))]
etr = [x for x in list(train.loc[:, (train != train.iloc[0]).any()]) if ((x in columns) and (x not in endog))]
exog = list(set(ete) & set(etr))
print(exog)
#print(train.var(), X.info())
[' work', ' grocers', ' lunch', ' bar', ' relatives', ' home']
# train autoregression
#model = VARMAX(train[endog], order=(1,0), exog=train[exog])
model = VARMAX(train[endogdiffs], order=(40,1,40), trend='nc', exog=train[exog])
/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/statespace/varmax.py:152: EstimationWarning: Estimation of VARMA(p,q) models is not generically robust, due especially to identification issues. EstimationWarning)
model_fit = model.fit(model='newton')
#model_fit = model.fit(model='cg')
#print(model_fit.mle_retvals)
model_fit.plot_diagnostics()
##window = model_fit.k_ar
coef = model_fit.params
predictions = pd.DataFrame()
predictions = model_fit.forecast(steps=len(test), exog=test[exog])
y = predictions.copy()
print(y.head(3))
# plot
plotaxis = plt.figure(figsize=(15,10)).gca()
xlabels = plotaxis.get_xticklabels()
plt§.setp(xlabels, rotation=85, fontsize=10)
ylabels = plotaxis.get_yticklabels()
plt.setp(ylabels, fontsize=10)
for i, column in enumerate(exog):
plt.plot(train[column], color=locations[i], linewidth=1.0)
plt.plot(test[column], color=locations[i], linewidth=1.0)
appColors = ['r', 'orange', 'yellow']
for i, app in enumerate(endog):
print(app, train[app].first_valid_index(), train[app].iloc[0], u[columns][app][train[app].first_valid_index()])
plt.plot(np.exp(train[app + '_log_diff'].append(y[app + '_log_diff']).cumsum()), color=[(i/10)+0.5, (i/10)+0.5, (i/10)+0.5])
plt.plot(np.exp(train[app + '_log_diff'].append(test[app + '_log_diff']).cumsum()), color=appColors[i])
plt.show()
app_1 2017-08-28 11:15:00 0.010125 0.010125 app_2 2017-08-28 11:15:00 0.02 0.0
Okay so this isn't a great fit, but it is a start. We see that in general the model expects the numbers to stay lower, this is probably because we are capturing the rise from start at 0, without sufficient stationary momentum.
On the negative, the deep dive at the start on the value at 1 suggests that we probably should stop using data for a day or so after 1.0 is reached, the ceiling effects the results.
On the positive, some of the trend moments are easy to see in the prediction, and is set at the right time windows given exog data. Provided we can actually predict exogenous data with sufficiency, we can hope to get good results predicting preceipitous drops. This is all the more true since the app can likely expect to be run more than every few days, so the prediction only needs to consider a short term. The big drop we were hoping to see in day two is barely a blip, but if there is a cutoff slope for signficance in the trend, we can make notification predictions even without accurate metric.
To see if a neural network can easily bring added value, we need to run a prediction without provided exogenous data.
import tensorflow as tf
from tensorflow.python.ops import math_ops
from ESN import EchoStateRNNCell
# takes only current needed GPU memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# random numbers
random_seed = 1
rng = np.random.RandomState(random_seed)
# Global variables
batches = 1
stime = len(train)
num_units = len(columns)
num_inputs = len(columns)
rnn_init_state = np.zeros([1, num_units], dtype="float32")
rnn_cols = np.concatenate([endogdiffs, exog])
rnn_inputs = np.zeros((batches, stime, num_inputs), dtype="float32")
rnn_inputs = train[rnn_cols].values.reshape((1, stime, num_inputs))
plt.plot(rnn_inputs[0,:,:], linewidth=1.0)
plt.show()
rnniDf = pd.DataFrame(rnn_inputs.reshape(stime, num_inputs))
plt.plot(np.exp(rnniDf[[0, 1]].cumsum()), linewidth=1.0)
#plt.ylim(-0.01, 0.01)
plt.show()
activation = lambda x: math_ops.tanh(x)
tf.reset_default_graph()
dynamic_graph = tf.Graph()
with dynamic_graph.as_default() as g:
rng = np.random.RandomState(random_seed)
# Init the ESN cell
cell = EchoStateRNNCell(num_units=num_units,
num_inputs=num_inputs,
activation=activation,
decay=0.002,
epsilon=1e-10,
alpha=0.5,
optimize=True,
optimize_vars=["rho", "decay","alpha", "sw"],
rng=rng)
inputs = tf.placeholder(tf.float32, [batches, stime, num_inputs])
init_state = tf.placeholder(tf.float32, [1, num_units])
# Build the graph using the API
states, final_state = tf.nn.dynamic_rnn(cell,
inputs,
initial_state=init_state)
# tf.nn.dynamic_rnn returns the state, not the output of the cell
outputs = tf.reshape(activation(states), [stime, num_units])
# Run the simulation
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
stored_outputs = sess.run(outputs, feed_dict={inputs:rnn_inputs,
init_state:rnn_init_state})
plt.plot(np.squeeze(stored_outputs), linewidth=1.0)
plt.show()
esnoDf = pd.DataFrame(np.squeeze(stored_outputs).reshape(stime, num_inputs))
plt.plot(esnoDf[0], linewidth=1.0, color='yellow')
plt.plot(esnoDf[1], linewidth=1.0, color='black')
plt.plot(rnniDf[[0, 1]], linewidth=1.0)
#plt.ylim(0, 1)
plt.show()
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default() as g:
rng = np.random.RandomState(random_seed)
lr = 0.01
# Build the graph
inputs = tf.placeholder(tf.float32, [batches, stime, num_inputs])
target = tf.placeholder(tf.float32, [stime, 1])
init_state = tf.placeholder(tf.float32, [1, num_units])
# Init the ESN cell
print("Making ESN init graph ...")
cell = EchoStateRNNCell(num_units=num_units,
activation=activation,
decay=0.1,
alpha=0.5,
rng=rng,
optimize=True,
optimize_vars=["rho", "decay","alpha", "sw"])
print("Done")
# cell spreading of activations
print("Making ESN spreading graph ...")
states = []
state = init_state
for t in range(stime):
state,_ = cell(inputs=inputs[0,t:(t+1),:], state=state)
states.append(state)
outputs = tf.reshape(states, [stime, num_units])
print("Done")
# ridge regression
print("Making regression graph ...")
# do the regression on a training subset of the timeseries
begin = 0
end = stime
# optimize also lambda
lmb = tf.get_variable("lmb", initializer=0.1,
dtype=tf.float32, trainable=True)
output_slice = outputs[begin:end,:]
Wout = tf.matmul(
tf.matrix_inverse(tf.matmul(tf.transpose(output_slice), output_slice) +
lmb*tf.eye(num_units)),
tf.matmul(tf.transpose(output_slice), target[begin:end,:]) )
print("Done")
# readout
print("Making readout spreading graph ...")
readouts = tf.matmul(outputs, Wout)
print("Done")
# train graph
print("Making training graph ...")
# calculate the loss over all the timeseries (escluded the beginning)
clip_rho = cell.rho.assign(tf.clip_by_value(cell.rho, 0.0, 1.0))
clip_alpha = cell.alpha.assign(tf.clip_by_value(cell.alpha, 0.0, 1.0))
clip_decay = cell.decay.assign(tf.clip_by_value(cell.decay, 0.0, 1.0))
clip_sw = cell.decay.assign(tf.clip_by_value(cell.sw, 0.0001, 0.5))
clip_lmb = cell.decay.assign(tf.clip_by_value(lmb, 0.0001, 0.5))
clip = tf.group(clip_rho, clip_alpha, clip_decay,clip_sw, clip_lmb)
loss = NRMSE(target[begin:end,:], readouts[begin:end,:])
try: # if optimize == True
optimizer = tf.train.GradientDescentOptimizer(lr)
train = optimizer.minimize(loss)
except ValueError: # if optimize == False
train = tf.get_variable("trial", (), dtype=None)
print("Done")
Making ESN init graph ... Done Making ESN spreading graph ...
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) /anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs) 1625 try: -> 1626 c_op = c_api.TF_FinishOperation(op_desc) 1627 except errors.InvalidArgumentError as e: InvalidArgumentError: Dimensions must be equal, but are 8 and 1 for 'echo_state_rnn_cell/MatMul' (op: 'MatMul') with input shapes: [1,8], [1,8]. During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) <ipython-input-95-4604f6391adb> in <module>() 29 state = init_state 30 for t in range(stime): ---> 31 state,_ = cell(inputs=inputs[0,t:(t+1),:], state=state) 32 states.append(state) 33 outputs = tf.reshape(states, [stime, num_units]) /anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope) 231 setattr(self, scope_attrname, scope) 232 with scope: --> 233 return super(RNNCell, self).__call__(inputs, state) 234 235 def _rnn_get_variable(self, getter, *args, **kwargs): /anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs) 362 363 # Actually call layer --> 364 outputs = super(Layer, self).__call__(inputs, *args, **kwargs) 365 366 if not context.executing_eagerly(): /anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs) 767 768 if not in_deferred_mode: --> 769 outputs = self.call(inputs, *args, **kwargs) 770 if outputs is None: 771 raise ValueError('A layer\'s `call` method should return a Tensor ' ~/Public/Harvard/Thesis/data/generateFakedData/ESN.py in call(self, inputs, state) 119 new_state = state + self.decay*( 120 self._activation( --> 121 tf.matmul(inputs, self.W * self.sw) + 122 tf.matmul(self._activation(state), self.U * self.rho_one * self.rho) 123 ) /anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py in matmul(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name) 2051 else: 2052 return gen_math_ops.mat_mul( -> 2053 a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) 2054 2055 /anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py in mat_mul(a, b, transpose_a, transpose_b, name) 4558 _, _, _op = _op_def_lib._apply_op_helper( 4559 "MatMul", a=a, b=b, transpose_a=transpose_a, transpose_b=transpose_b, -> 4560 name=name) 4561 _result = _op.outputs[:] 4562 _inputs_flat = _op.inputs /anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords) 785 op = g.create_op(op_type_name, inputs, output_types, name=scope, 786 input_types=input_types, attrs=attr_protos, --> 787 op_def=op_def) 788 return output_structure, op_def.is_stateful, op 789 /anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs) 486 'in a future version' if date is None else ('after %s' % date), 487 instructions) --> 488 return func(*args, **kwargs) 489 return tf_decorator.make_decorator(func, new_func, 'deprecated', 490 _add_deprecated_arg_notice_to_docstring( /anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in create_op(***failed resolving arguments***) 3270 input_types=input_types, 3271 original_op=self._default_original_op, -> 3272 op_def=op_def) 3273 self._create_op_helper(ret, compute_device=compute_device) 3274 return ret /anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def) 1788 op_def, inputs, node_def.attr) 1789 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs, -> 1790 control_input_ops) 1791 1792 # Initialize self._outputs. /anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs) 1627 except errors.InvalidArgumentError as e: 1628 # Convert to ValueError for backwards compatibility. -> 1629 raise ValueError(str(e)) 1630 1631 return c_op ValueError: Dimensions must be equal, but are 8 and 1 for 'echo_state_rnn_cell/MatMul' (op: 'MatMul') with input shapes: [1,8], [1,8].