#!/usr/bin/env python # coding: utf-8 #

# # **자연어와 Deep Learning** # ## **LSTM 단어 알파벳 완성모델** # #

# ## **1 데이터 정의** # In[1]: import tensorflow as tf import numpy as np char_arr = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] num_dic = {n: i for i, n in enumerate(char_arr)} dic_len = len(num_dic) # In[2]: def make_batch(seq_data): input_batch, target_batch = [], [] for seq in seq_data: input_num = [num_dic[n] for n in seq[:-1]] target = num_dic[seq[-1]] input_batch.append(np.eye(dic_len)[input_num]) target_batch.append(target) return input_batch, target_batch #

# ## **2 모델의 정의** # In[3]: tf.reset_default_graph() learning_rate = 0.01 n_step = 3 n_hidden, total_epoch = 64, 30 n_input = n_class = dic_len # In[4]: X = tf.placeholder(tf.float32, [None, n_step, n_input]) Y = tf.placeholder(tf.int32, [None]) W = tf.Variable(tf.random_normal([n_hidden, n_class])) b = tf.Variable(tf.random_normal([n_class])) # In[5]: cell1 = tf.nn.rnn_cell.BasicLSTMCell(n_hidden) cell1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=0.5) cell2 = tf.nn.rnn_cell.BasicLSTMCell(n_hidden) multi_cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2]) # In[6]: outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32) outputs = tf.transpose(outputs, [1, 0, 2]) outputs = outputs[-1] model = tf.matmul(outputs, W) + b # In[7]: cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( logits = model, labels = Y)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) #

# ## **3 모델의 학습** # In[8]: get_ipython().run_cell_magic('time', '', "seq_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']\n\nsess = tf.Session()\nsess.run(tf.global_variables_initializer())\ninput_batch, target_batch = make_batch(seq_data)\nfor epoch in range(total_epoch):\n _, loss = sess.run([optimizer, cost],\n feed_dict={X: input_batch, Y: target_batch})\n if epoch % 4 == 0:\n print('Epoch: {:.4f} cost = {:.6f}'.format(epoch + 1, loss))\nprint('최적화 완료!')\n") #

# ## **4 학습 모델의 평가** # In[9]: get_ipython().run_cell_magic('time', '', 'prediction = tf.cast(tf.argmax(model, 1), tf.int32)\nprediction_check = tf.equal(prediction, Y) \naccuracy = tf.reduce_mean(tf.cast(prediction_check, tf.float32))\n\ninput_batch, target_batch = make_batch(seq_data)\npredict, accuracy_val = sess.run([prediction, accuracy],\n feed_dict={X: input_batch, Y: target_batch})\n') # In[10]: predict_words = [] for idx, val in enumerate(seq_data): last_char = char_arr[predict[idx]] predict_words.append(val[:3] + last_char) print('\n=== 예측 결과 ===') print('입력값:', [w[:-1] + ' ' for w in seq_data]) print('예측값:', predict_words) print('정확도:', accuracy_val) sess.close()