# http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/
# http://learningtensorflow.com/index.html
# http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4)
sess = tf.InteractiveSession()
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]
with tf.variable_scope('one_cell') as scope:
# One cell RNN input_dim (4) -> output_dim (2)
hidden_size = 2
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
print(cell.output_size, cell.state_size)
x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
pp.pprint(x_data)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
2 2 array([[[ 1., 0., 0., 0.]]], dtype=float32) array([[[-0.42409304, 0.64651132]]], dtype=float32)
with tf.variable_scope('two_sequances') as scope:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5
hidden_size = 2
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
x_data = np.array([[h, e, l, l, o]], dtype=np.float32)
print(x_data.shape)
pp.pprint(x_data)
outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
(1, 5, 4) array([[[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 0., 1.]]], dtype=float32) array([[[ 0.19709368, 0.24918222], [-0.11721198, 0.1784237 ], [-0.35297349, -0.66278851], [-0.70915914, -0.58334434], [-0.38886023, 0.47304463]]], dtype=float32)
with tf.variable_scope('3_batches') as scope:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
hidden_size = 2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(
cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
array([[[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 0., 1.]], [[ 0., 1., 0., 0.], [ 0., 0., 0., 1.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.]], [[ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 1., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.]]], dtype=float32) array([[[-0.0173022 , -0.12929453], [-0.14995177, -0.23189341], [ 0.03294011, 0.01962204], [ 0.12852104, 0.12375218], [ 0.13597946, 0.31746736]], [[-0.15243632, -0.14177315], [ 0.04586344, 0.12249056], [ 0.14292534, 0.15872268], [ 0.18998367, 0.21004884], [ 0.21788891, 0.24151592]], [[ 0.10713603, 0.11001928], [ 0.17076059, 0.1799853 ], [-0.03531617, 0.08993293], [-0.1881337 , -0.08296411], [-0.00404597, 0.07156041]]], dtype=float32)
with tf.variable_scope('3_batches_dynamic_length') as scope:
# One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
hidden_size = 2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(
cell, x_data, sequence_length=[5,3,4], dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
array([[[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 0., 1.]], [[ 0., 1., 0., 0.], [ 0., 0., 0., 1.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.]], [[ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 1., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.]]], dtype=float32) array([[[-0.1560633 , -0.15812504], [-0.12457105, 0.00623323], [-0.12050693, -0.04313403], [-0.13090043, -0.08644461], [-0.00809618, 0.01956913]], [[-0.03981951, 0.08950347], [ 0.08891603, 0.13232458], [ 0.04445181, 0.12076475], [ 0. , 0. ], [ 0. , 0. ]], [[-0.03411232, -0.05148866], [-0.0663683 , -0.09379878], [-0.0947878 , 0.03129581], [-0.09255724, 0.1121003 ], [ 0. , 0. ]]], dtype=float32)
with tf.variable_scope('initial_state') as scope:
batch_size = 3
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
# One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch: 3
hidden_size=2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
initial_state=initial_state, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
array([[[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 0., 1.]], [[ 0., 1., 0., 0.], [ 0., 0., 0., 1.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 0., 1., 0.]], [[ 0., 0., 1., 0.], [ 0., 0., 1., 0.], [ 0., 1., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.]]], dtype=float32) array([[[ 0.08037324, 0.09708502], [ 0.13482611, 0.22225909], [ 0.31230038, 0.21865457], [ 0.37461194, 0.23103678], [ 0.27929804, 0.19694683]], [[ 0.08168668, 0.16866113], [ 0.06738912, 0.16512491], [ 0.22980295, 0.25232255], [ 0.32049009, 0.25064784], [ 0.37890342, 0.24961403]], [[ 0.17865573, 0.09529682], [ 0.29475945, 0.15692782], [ 0.20178071, 0.26526704], [ 0.20977789, 0.3048915 ], [ 0.38907003, 0.26467156]]], dtype=float32)
# Create input data
batch_size=3
sequence_length=5
input_dim=3
x_data = np.arange(45, dtype=np.float32).reshape(batch_size, sequence_length, input_dim)
pp.pprint(x_data) # batch, sequence_length, input_dim
array([[[ 0., 1., 2.], [ 3., 4., 5.], [ 6., 7., 8.], [ 9., 10., 11.], [ 12., 13., 14.]], [[ 15., 16., 17.], [ 18., 19., 20.], [ 21., 22., 23.], [ 24., 25., 26.], [ 27., 28., 29.]], [[ 30., 31., 32.], [ 33., 34., 35.], [ 36., 37., 38.], [ 39., 40., 41.], [ 42., 43., 44.]]], dtype=float32)
with tf.variable_scope('generated_data') as scope:
# One cell RNN input_dim (3) -> output_dim (5). sequence: 5, batch: 3
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
initial_state=initial_state, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
array([[[ 2.45132565e-01, -2.26720855e-01, -9.13775116e-02, -2.41879746e-01, -1.91152543e-02], [ 3.88628483e-01, -4.98127311e-01, -5.63271251e-03, -3.56282324e-01, -2.27528125e-01], [ 4.56008732e-01, -4.88269717e-01, -2.17594192e-04, -3.37665766e-01, -4.73884165e-01], [ 4.68175769e-01, -3.43346447e-01, -6.79731329e-06, -2.79633790e-01, -6.64093494e-01], [ 4.53517973e-01, -1.80738673e-01, -1.71420652e-07, -2.21518710e-01, -7.89903104e-01]], [[ 3.25161479e-02, -3.76356095e-02, -4.70733952e-10, -1.30296305e-01, -5.30268252e-01], [ 5.25916032e-02, -2.39814539e-02, -1.25837223e-11, -1.25736266e-01, -8.11463356e-01], [ 6.13600165e-02, -1.12387687e-02, -2.15500184e-13, -1.06639825e-01, -9.18502569e-01], [ 6.30336851e-02, -4.92686359e-03, -2.89691854e-15, -8.40380341e-02, -9.58424091e-01], [ 6.10851720e-02, -2.18412513e-03, -3.52614224e-17, -6.41328543e-02, -9.75986063e-01]], [[ 1.38840056e-03, -9.62406339e-04, -5.32535570e-19, -3.17904465e-02, -6.70124114e-01], [ 2.27924506e-03, -4.67757985e-04, -6.79441632e-21, -3.17342579e-02, -9.22334671e-01], [ 2.62538693e-03, -2.18001660e-04, -6.31787844e-23, -2.62530502e-02, -9.81252372e-01], [ 2.63347290e-03, -1.02423473e-04, -4.19843804e-25, -1.99035294e-02, -9.93987679e-01], [ 2.49430514e-03, -4.83657859e-05, 3.01315592e-27, -1.47517556e-02, -9.97247159e-01]]], dtype=float32)
with tf.variable_scope('MultiRNNCell') as scope:
# Make rnn
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
cell = rnn.MultiRNNCell([cell] * 3, state_is_tuple=True) # 3 layers
# rnn in/out
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
print("dynamic rnn: ", outputs)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size
dynamic rnn: Tensor("MultiRNNCell/rnn/transpose:0", shape=(3, 5, 5), dtype=float32) array([[[-0.00083933, -0.00015323, -0.00033779, 0.00080626, 0.00034253], [-0.00583554, 0.00118117, -0.00456069, 0.00529532, 0.0044482 ], [-0.01360667, 0.00480944, -0.01308769, 0.01040751, 0.0129455 ], [-0.02190471, 0.01003678, -0.023829 , 0.01284032, 0.02427816], [-0.02970388, 0.01583032, -0.03484126, 0.01160637, 0.03700195]], [[-0.00554347, 0.00480109, -0.00781848, 0.00443573, 0.00793266], [-0.01351871, 0.01181496, -0.01981916, 0.00755195, 0.02100384], [-0.02202674, 0.01874873, -0.03256253, 0.00714555, 0.03617828], [-0.03014897, 0.02444301, -0.04405964, 0.00303673, 0.05133053], [-0.03755479, 0.0285395 , -0.05335546, -0.00401108, 0.06519946]], [[-0.00576473, 0.00494064, -0.00812288, 0.00460927, 0.00833436], [-0.01375382, 0.01163429, -0.01992369, 0.00778724, 0.02128934], [-0.02198837, 0.01793554, -0.03203027, 0.0074151 , 0.03568052], [-0.02965781, 0.02297196, -0.04270053, 0.0034585 , 0.04962701], [-0.03656508, 0.02657485, -0.05117567, -0.0032303 , 0.06217124]]], dtype=float32)
with tf.variable_scope('dynamic_rnn') as scope:
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32,
sequence_length=[1, 3, 2])
# lentgh 1 for batch 1, lentgh 2 for batch 2
print("dynamic rnn: ", outputs)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size
dynamic rnn: Tensor("dynamic_rnn/rnn/transpose:0", shape=(3, 5, 5), dtype=float32) array([[[ -7.77400890e-03, -1.28562674e-01, 6.49908483e-02, -1.13532230e-01, 1.58623144e-01], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]], [[ 4.84958527e-08, -1.01035475e-05, -1.17934204e-03, -4.40897048e-01, 7.96075852e-04], [ 6.29663877e-09, -4.45833075e-06, -3.47505091e-04, -4.97627676e-01, 5.89297793e-04], [ 7.23061000e-10, -1.57925604e-06, -1.08419306e-04, -5.05440235e-01, 3.45583743e-04], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]], [[ 1.14308214e-14, -1.68528996e-10, -3.61130469e-06, -5.15216947e-01, 9.89917226e-07], [ 1.21262994e-15, -7.43059780e-11, -1.03032392e-06, -5.22881925e-01, 7.17830119e-07], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]], dtype=float32)
with tf.variable_scope('bi-directional') as scope:
# bi-directional rnn
cell_fw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
cell_bw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_data,
sequence_length=[2, 3, 1],
dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(sess.run(outputs))
pp.pprint(sess.run(states))
( array([[[ -6.91036135e-02, -8.50935578e-02, 1.11118190e-01, 9.62263346e-02, 5.16710952e-02], [ -7.26657510e-02, -8.60558674e-02, 1.28619120e-01, 1.28620356e-01, 1.27893269e-01], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]], [[ -4.88487422e-04, -1.19689852e-04, -1.31409352e-05, 6.31166389e-03, 2.36047944e-03], [ -1.79336712e-04, -8.58747517e-05, -1.47025303e-05, 3.67163564e-03, 7.57511822e-04], [ -5.09979509e-05, -4.93491898e-05, -1.48784202e-05, 1.73931674e-03, 2.30893813e-04], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]], [[ -7.59716784e-07, -3.01464169e-08, -3.27219529e-10, 1.28019688e-04, 6.67082486e-06], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]], dtype=float32), array([[[-0.25029412, 0.00926055, 0.31124777, 0.34806553, 0.10433573], [-0.17661008, -0.03030178, 0.33094952, 0.53041464, 0.09067145], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ]], [[-0.16921706, -0.02368811, 0.204624 , 0.75845337, 0.06130206], [-0.15185349, -0.01456582, 0.1524749 , 0.76045614, 0.03964303], [-0.12275493, -0.00838695, 0.10047279, 0.7611382 , 0.01887123], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ]], [[-0.07493417, -0.00187773, 0.03880407, 0.76157337, 0.00505197], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ]]], dtype=float32)) ( LSTMStateTuple(c=array([[ -6.96631312e-01, -2.70583540e-01, 1.34543031e-01, 1.22980523e+00, 2.07242519e-01], [ -2.66435528e+00, -5.09080105e-03, -1.48784211e-05, 2.98778200e+00, 2.32175487e-04], [ -9.66530681e-01, -2.15361561e-05, -3.27219557e-10, 1.00000000e+00, 6.67449740e-06]], dtype=float32), h=array([[ -7.26657510e-02, -8.60558674e-02, 1.28619120e-01, 1.28620356e-01, 1.27893269e-01], [ -5.09979509e-05, -4.93491898e-05, -1.48784202e-05, 1.73931674e-03, 2.30893813e-04], [ -7.59716784e-07, -3.01464169e-08, -3.27219529e-10, 1.28019688e-04, 6.67082486e-06]], dtype=float32)), LSTMStateTuple(c=array([[-0.45588541, 0.02305964, 0.60151225, 0.64902705, 0.18270651], [-0.20150106, -0.6873486 , 2.57025671, 0.99971294, 0.0740222 ], [-0.07767717, -0.83411807, 0.99998546, 1. , 0.00545057]], dtype=float32), h=array([[-0.25029412, 0.00926055, 0.31124777, 0.34806553, 0.10433573], [-0.16921706, -0.02368811, 0.204624 , 0.75845337, 0.06130206], [-0.07493417, -0.00187773, 0.03880407, 0.76157337, 0.00505197]], dtype=float32)))
# flattern based softmax
hidden_size=3
sequence_length=5
batch_size=3
num_classes=5
pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2
x_data = x_data.reshape(-1, hidden_size)
pp.pprint(x_data)
softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes)
outputs = np.matmul(x_data, softmax_w)
outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class
pp.pprint(outputs)
array([[[ 0., 1., 2.], [ 3., 4., 5.], [ 6., 7., 8.], [ 9., 10., 11.], [ 12., 13., 14.]], [[ 15., 16., 17.], [ 18., 19., 20.], [ 21., 22., 23.], [ 24., 25., 26.], [ 27., 28., 29.]], [[ 30., 31., 32.], [ 33., 34., 35.], [ 36., 37., 38.], [ 39., 40., 41.], [ 42., 43., 44.]]], dtype=float32) array([[ 0., 1., 2.], [ 3., 4., 5.], [ 6., 7., 8.], [ 9., 10., 11.], [ 12., 13., 14.], [ 15., 16., 17.], [ 18., 19., 20.], [ 21., 22., 23.], [ 24., 25., 26.], [ 27., 28., 29.], [ 30., 31., 32.], [ 33., 34., 35.], [ 36., 37., 38.], [ 39., 40., 41.], [ 42., 43., 44.]], dtype=float32) array([[[ 25., 28., 31., 34., 37.], [ 70., 82., 94., 106., 118.], [ 115., 136., 157., 178., 199.], [ 160., 190., 220., 250., 280.], [ 205., 244., 283., 322., 361.]], [[ 250., 298., 346., 394., 442.], [ 295., 352., 409., 466., 523.], [ 340., 406., 472., 538., 604.], [ 385., 460., 535., 610., 685.], [ 430., 514., 598., 682., 766.]], [[ 475., 568., 661., 754., 847.], [ 520., 622., 724., 826., 928.], [ 565., 676., 787., 898., 1009.], [ 610., 730., 850., 970., 1090.], [ 655., 784., 913., 1042., 1171.]]], dtype=float32)
# [batch_size, sequence_length, emb_dim ]
prediction1 = tf.constant([[[0, 1], [0, 1], [0, 1]]], dtype=tf.float32)
prediction2 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32)
prediction3 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32)
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])
# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)
sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights)
sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights)
sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sess.run(tf.global_variables_initializer())
print("Loss1: ", sequence_loss1.eval(),
"Loss2: ", sequence_loss2.eval(),
"Loss3: ", sequence_loss3.eval())
Loss1: 0.313262 Loss2: 1.31326 Loss3: 0.646595