char_arr = [ c for c in 'SPabcdefghijklmnopqrstuvwxyz나놀녀단랑무사소스어이키E']
num_dic = { n : i for i, n in enumerate(char_arr)}
dic_len = len(num_dic)
seq_data = [['word', '단어'], ['wood', '나무'], ['game', '놀이'],
['girl', '소녀'], ['kiss', '키스'], ['love', '사랑']]
def make_batch(seq_data):
input_batch, output_batch, target_batch = [], [], []
for seq in seq_data:
input = [num_dic[n] for n in seq[0]]
output = [num_dic[n] for n in ('S' + seq[1])]
target = [num_dic[n] for n in (seq[1] + 'E')]
input_batch.append(np.eye(dic_len)[input])
output_batch.append(np.eye(dic_len)[output])
target_batch.append(target)
return input_batch, output_batch, target_batch
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
learning_rate = 0.01
n_hidden, total_epoch = 128, 100
n_class = n_input = dic_len
enc_input = tf.placeholder(tf.float32, [None, None, n_input])
dec_input = tf.placeholder(tf.float32, [None, None, n_input])
targets = tf.placeholder(tf.int64, [None, None]) # [batch size, time steps]
with tf.variable_scope('encode'):
enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob = 0.5)
outputs, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input,dtype=tf.float32)
with tf.variable_scope('decode'):
dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)
outputs, dec_states = tf.nn.dynamic_rnn(dec_cell, dec_input,
initial_state = enc_states,
dtype = tf.float32)
model = tf.layers.dense(outputs, n_class, activation=None)
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits = model, labels = targets))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
%%time
sess = tf.Session()
sess.run(tf.global_variables_initializer())
input_batch, output_batch, target_batch = make_batch(seq_data)
for epoch in range(total_epoch):
_, loss = sess.run([optimizer, cost],
feed_dict={enc_input: input_batch,
dec_input: output_batch,
targets: target_batch})
if epoch % 9 == 0 :
print('Epoch: {:4d} cost = {:.6f}'.format((epoch + 1),loss))
print('최적화 완료!')
Epoch: 1 cost = 3.721701 Epoch: 10 cost = 0.165835 Epoch: 19 cost = 0.016843 Epoch: 28 cost = 0.005062 Epoch: 37 cost = 0.004484 Epoch: 46 cost = 0.001066 Epoch: 55 cost = 0.000756 Epoch: 64 cost = 0.000527 Epoch: 73 cost = 0.000315 Epoch: 82 cost = 0.000267 Epoch: 91 cost = 0.000388 Epoch: 100 cost = 0.000355 최적화 완료! CPU times: user 1.61 s, sys: 412 ms, total: 2.03 s Wall time: 1.67 s
%%time
def translate(word):
seq_data = [word, 'P' * len(word)]
input_batch, output_batch, target_batch = make_batch([seq_data])
prediction = tf.argmax(model, 2) # [None, None, n_input]
result = sess.run(prediction,
feed_dict={enc_input: input_batch,
dec_input: output_batch,
targets: target_batch})
decoded = [char_arr[i] for i in result[0]]
end = decoded.index('E')
translated = ''.join(decoded[:end])
return translated
print('\n=== 번역 테스트 ===')
for test_text in ['wodr','love','loev','abcd']:
print('{} -> Translated : {}'.format(test_text,
translate(test_text)))
sess.close()
=== 번역 테스트 === wodr -> Translated : 단어 love -> Translated : 사랑 loev -> Translated : 사랑 abcd -> Translated : 단어 CPU times: user 297 ms, sys: 2.26 ms, total: 299 ms Wall time: 280 ms