#!/usr/bin/env python # coding: utf-8 # ### Basic RNN - Simple # - 참고 # - https://gist.github.com/j-min/481749dcb853b4477c4f441bf7452195 # - http://pythonkim.tistory.com/58 # - http://pythonkim.tistory.com/61 # - http://karpathy.github.io/2015/05/21/rnn-effectiveness/ # - 목표 # - Character-level Language Modeling # - 입력 데이터 # In[68]: import tensorflow as tf import numpy as np # In[69]: tf.__version__ # - 사전 내포방식으로 Character를 키로, 인덱스를 값으로 지니는 Dictionary 생성 # In[70]: char_rdic = ['h', 'e', 'l', 'o'] # id -> char char_dic = {w : i for i, w in enumerate(char_rdic)} # char -> id print char_dic # - ground_true --> 'hello' --> [0, 1, 2, 2, 3] 생성 # In[71]: ground_truth = [char_dic[c] for c in 'hello'] print ground_truth # - 입력데이터로 활용할 x_data 마련 # - 입력데이터에는 hell 까지만 존재 # In[72]: x_data = np.array([[1,0,0,0], # h [0,1,0,0], # e [0,0,1,0], # l [0,0,1,0]], # l dtype = 'float32') print x_data.shape, x_data.dtype # - 참고: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#one_hot # # **tf.one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None)** # In[73]: session = tf.InteractiveSession() session.run(tf.initialize_all_variables()) # In[74]: print ground_truth[:], ground_truth[:-1] # In[75]: x_data = tf.one_hot(ground_truth[:-1], depth = len(char_dic), on_value = 1.0, off_value = 0.0) print x_data.eval() # In[76]: # Configuration rnn_size = len(char_dic) # 4 batch_size = 1 output_size = 4 # - 참고: https://www.tensorflow.org/api_docs/python/rnn_cell/rnn_cells_for_use_with_tensorflow_s_core_rnn_methods#BasicRNNCell # In[77]: # RNN Model rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = rnn_size) #initial_state = rnn_cell.zero_state(batch_size, tf.float32) initial_state = tf.zeros([batch_size, rnn_cell.state_size]) # 위 코드와 같은 결과 print(initial_state) print initial_state.eval() # - 참고: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#split # # **tf.split(split_dim, num_split, value, name='split')** # In[78]: print x_data.eval() print x_split = tf.split(split_dim = 0, num_split = len(char_dic), value = x_data) # dimension=0 을 기준으로 4개로 split print type(x_split) for t in x_split: print t.eval() # - 참고: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks#rnn # In[80]: with tf.variable_scope('forward'): outputs, state = tf.nn.rnn(cell = rnn_cell, inputs = x_split, initial_state = initial_state) print type(outputs) print for t in outputs: print t.get_shape() print print state.get_shape() # In[81]: result_outputs = tf.reshape(tf.concat(1, outputs), # shape = 1 x 16 [-1, rnn_size]) # shape = 4 x 4 print result_outputs.get_shape() # In[82]: print ground_truth[1:] targets = tf.constant(ground_truth[1:], tf.int32) # a shape of [-1] flattens into 1-D print targets.eval() # In[83]: weights = tf.ones([len(char_dic) * batch_size]) # tf.ones([4]) print weights.eval() # - 참고: https://www.tensorflow.org/tutorials/recurrent/ # - tf.nn.seq2seq.sequence_loss_by_example - return Weighted cross-entropy loss for a sequence of logits (per example) # In[84]: loss = tf.nn.seq2seq.sequence_loss_by_example([result_outputs], [targets], [weights]) cost = tf.reduce_sum(loss) / batch_size train_op = tf.train.RMSPropOptimizer(0.01, 0.9).minimize(cost) # In[85]: # Launch the graph in a session with tf.Session() as sess: tf.initialize_all_variables().run() for i in range(100): sess.run(train_op) result = sess.run(tf.argmax(result_outputs, 1)) print(result, [char_rdic[t] for t in result]) # In[ ]: