#!/usr/bin/env python # coding: utf-8 # In[1]: # http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/ # http://learningtensorflow.com/index.html # http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/ import tensorflow as tf import numpy as np from tensorflow.contrib import rnn import pprint pp = pprint.PrettyPrinter(indent=4) sess = tf.InteractiveSession() # In[2]: # One hot encoding for each char in 'hello' h = [1, 0, 0, 0] e = [0, 1, 0, 0] l = [0, 0, 1, 0] o = [0, 0, 0, 1] # ![image](https://cloud.githubusercontent.com/assets/901975/23348727/cc981856-fce7-11e6-83ea-4b187473466b.png) # # In[3]: with tf.variable_scope('one_cell') as scope: # One cell RNN input_dim (4) -> output_dim (2) hidden_size = 2 cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size) print(cell.output_size, cell.state_size) x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]] pp.pprint(x_data) outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # ![image](https://cloud.githubusercontent.com/assets/901975/23383634/649efd0a-fd82-11e6-925d-8041242743b0.png) # In[4]: with tf.variable_scope('two_sequances') as scope: # One cell RNN input_dim (4) -> output_dim (2). sequence: 5 hidden_size = 2 cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size) x_data = np.array([[h, e, l, l, o]], dtype=np.float32) print(x_data.shape) pp.pprint(x_data) outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # ![image](https://cloud.githubusercontent.com/assets/901975/23383681/9943a9fc-fd82-11e6-8121-bd187994e249.png) # In[5]: with tf.variable_scope('3_batches') as scope: # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3 # 3 batches 'hello', 'eolll', 'lleel' x_data = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32) pp.pprint(x_data) hidden_size = 2 cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) outputs, _states = tf.nn.dynamic_rnn( cell, x_data, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # In[6]: with tf.variable_scope('3_batches_dynamic_length') as scope: # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3 # 3 batches 'hello', 'eolll', 'lleel' x_data = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32) pp.pprint(x_data) hidden_size = 2 cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) outputs, _states = tf.nn.dynamic_rnn( cell, x_data, sequence_length=[5,3,4], dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # In[7]: with tf.variable_scope('initial_state') as scope: batch_size = 3 x_data = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32) pp.pprint(x_data) # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch: 3 hidden_size=2 cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) initial_state = cell.zero_state(batch_size, tf.float32) outputs, _states = tf.nn.dynamic_rnn(cell, x_data, initial_state=initial_state, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # In[8]: # Create input data batch_size=3 sequence_length=5 input_dim=3 x_data = np.arange(45, dtype=np.float32).reshape(batch_size, sequence_length, input_dim) pp.pprint(x_data) # batch, sequence_length, input_dim # In[9]: with tf.variable_scope('generated_data') as scope: # One cell RNN input_dim (3) -> output_dim (5). sequence: 5, batch: 3 cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True) initial_state = cell.zero_state(batch_size, tf.float32) outputs, _states = tf.nn.dynamic_rnn(cell, x_data, initial_state=initial_state, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # In[10]: with tf.variable_scope('MultiRNNCell') as scope: # Make rnn cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True) cell = rnn.MultiRNNCell([cell] * 3, state_is_tuple=True) # 3 layers # rnn in/out outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32) print("dynamic rnn: ", outputs) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size # In[11]: with tf.variable_scope('dynamic_rnn') as scope: cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True) outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32, sequence_length=[1, 3, 2]) # lentgh 1 for batch 1, lentgh 2 for batch 2 print("dynamic rnn: ", outputs) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size # In[12]: with tf.variable_scope('bi-directional') as scope: # bi-directional rnn cell_fw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True) cell_bw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True) outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_data, sequence_length=[2, 3, 1], dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(sess.run(outputs)) pp.pprint(sess.run(states)) # In[13]: # flattern based softmax hidden_size=3 sequence_length=5 batch_size=3 num_classes=5 pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2 x_data = x_data.reshape(-1, hidden_size) pp.pprint(x_data) softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes) outputs = np.matmul(x_data, softmax_w) outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class pp.pprint(outputs) # In[14]: # [batch_size, sequence_length, emb_dim ] prediction1 = tf.constant([[[0, 1], [0, 1], [0, 1]]], dtype=tf.float32) prediction2 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32) prediction3 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32) # [batch_size, sequence_length] y_data = tf.constant([[1, 1, 1]]) # [batch_size * sequence_length] weights = tf.constant([[1, 1, 1]], dtype=tf.float32) sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights) sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights) sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights) sess.run(tf.global_variables_initializer()) print("Loss1: ", sequence_loss1.eval(), "Loss2: ", sequence_loss2.eval(), "Loss3: ", sequence_loss3.eval())