#!/usr/bin/env python # coding: utf-8 # # CS 20 : TensorFlow for Deep Learning Research # ## Lecture 11 : Recurrent Neural Networks # Simple example for Many to One Classification (word sentiment classification) by Stacked GRU with Drop out. # # ### Many to One Classification by Stacked GRU with Drop out # - Creating the **data pipeline** with `tf.data` # - Preprocessing word sequences (variable input sequence length) using `padding technique` by `user function (pad_seq)` # - Using `tf.nn.embedding_lookup` for getting vector of tokens (eg. word, character) # - Creating the model as **Class** # - Applying **Drop out** to model by `tf.contrib.rnn.DropoutWrapper` # - Applying **Stacking** to model by `tf.contrib.rnn.MultiRNNCell` # - Replacing **RNN Cell** with **GRU Cell** # - Reference # - https://github.com/golbin/TensorFlow-Tutorials/blob/master/10%20-%20RNN/02%20-%20Autocomplete.py # - https://github.com/aisolab/TF_code_examples_for_Deep_learning/blob/master/Tutorial%20of%20implementing%20Sequence%20classification%20with%20RNN%20series.ipynb # - https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/ # # ### Setup # In[1]: import os, sys import numpy as np import pandas as pd import matplotlib.pyplot as plt import tensorflow as tf import string get_ipython().run_line_magic('matplotlib', 'inline') slim = tf.contrib.slim print(tf.__version__) # ### Prepare example data # In[2]: words = ['good', 'bad', 'amazing', 'so good', 'bull shit', 'awesome'] y = [[1.,0.], [0.,1.], [1.,0.], [1., 0.],[0.,1.], [1.,0.]] # In[3]: # Character quantization char_space = string.ascii_lowercase char_space = char_space + ' ' + '*' char_space # In[4]: char_dic = {char : idx for idx, char in enumerate(char_space)} print(char_dic) # ### Create pad_seq function # In[5]: def pad_seq(sequences, max_len, dic): seq_len, seq_indices = [], [] for seq in sequences: seq_len.append(len(seq)) seq_idx = [dic.get(char) for char in seq] seq_idx += (max_len - len(seq_idx)) * [dic.get('*')] # 27 is idx of meaningless token "*" seq_indices.append(seq_idx) return seq_len, seq_indices # ### Apply pad_seq function to data # In[6]: max_length = 10 X_length, X_indices = pad_seq(sequences = words, max_len = max_length, dic = char_dic) # In[7]: print(X_length) print(np.shape(X_indices)) # ### Define CharStackedGRU class # In[8]: class CharStackedGRU: def __init__(self, X_length, X_indices, y, n_of_classes, dic, hidden_dims = [32, 16]): # data pipeline with tf.variable_scope('input_layer'): self._X_length = X_length self._X_indices = X_indices self._y = y self._keep_prob = tf.placeholder(dtype = tf.float32) one_hot = tf.eye(len(dic), dtype = tf.float32) self._one_hot = tf.get_variable(name='one_hot_embedding', initializer = one_hot, trainable = False) # embedding vector training 안할 것이기 때문 self._X_batch = tf.nn.embedding_lookup(params = self._one_hot, ids = self._X_indices) # Stacked-GRU with tf.variable_scope('stacked_gru'): cells = [] for hidden_dim in hidden_dims: cell = tf.contrib.rnn.GRUCell(num_units = hidden_dim, kernel_initializer = tf.contrib.layers.xavier_initializer(), activation = tf.nn.tanh) cell = tf.contrib.rnn.DropoutWrapper(cell = cell, output_keep_prob = self._keep_prob) cells.append(cell) else: cells = tf.contrib.rnn.MultiRNNCell(cells = cells) _, state = tf.nn.dynamic_rnn(cell = cells, inputs = self._X_batch, sequence_length = self._X_length, dtype = tf.float32) with tf.variable_scope('output_layer'): self._score = slim.fully_connected(inputs = state[-1], num_outputs = n_of_classes, activation_fn = None) with tf.variable_scope('loss'): self.ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = self._y, logits = self._score) with tf.variable_scope('prediction'): self._prediction = tf.argmax(input = self._score, axis = -1, output_type = tf.int32) def predict(self, sess, X_length, X_indices, keep_prob = 1.): feed_prediction = {self._X_length : X_length, self._X_indices : X_indices, self._keep_prob : keep_prob} return sess.run(self._prediction, feed_dict = feed_prediction) # ### Create a model of CharStackedGRU # In[9]: # hyper-parameter# lr = .003 epochs = 10 batch_size = 2 total_step = int(np.shape(X_indices)[0] / batch_size) print(total_step) # In[10]: ## create data pipeline with tf.data tr_dataset = tf.data.Dataset.from_tensor_slices((X_length, X_indices, y)) tr_dataset = tr_dataset.shuffle(buffer_size = 20) tr_dataset = tr_dataset.batch(batch_size = batch_size) tr_iterator = tr_dataset.make_initializable_iterator() print(tr_dataset) # In[11]: X_length_mb, X_indices_mb, y_mb = tr_iterator.get_next() # In[12]: char_stacked_gru = CharStackedGRU(X_length = X_length_mb, X_indices = X_indices_mb, y = y_mb, n_of_classes = 2, dic = char_dic, hidden_dims = [32,16]) # ### Creat training op and train model # In[13]: ## create training op opt = tf.train.AdamOptimizer(learning_rate = lr) training_op = opt.minimize(loss = char_stacked_gru.ce_loss) # In[14]: sess = tf.Session() sess.run(tf.global_variables_initializer()) tr_loss_hist = [] for epoch in range(epochs): avg_tr_loss = 0 tr_step = 0 sess.run(tr_iterator.initializer) try: while True: _, tr_loss = sess.run(fetches = [training_op, char_stacked_gru.ce_loss], feed_dict = {char_stacked_gru._keep_prob : .5}) avg_tr_loss += tr_loss tr_step += 1 except tf.errors.OutOfRangeError: pass avg_tr_loss /= tr_step tr_loss_hist.append(avg_tr_loss) print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss)) # In[15]: plt.plot(tr_loss_hist, label = 'train') # In[16]: yhat = char_stacked_gru.predict(sess = sess, X_length = X_length, X_indices = X_indices) # In[17]: print('training acc: {:.2%}'.format(np.mean(yhat == np.argmax(y, axis = -1))))