학습에 사용할 데이터를 다음처럼 준비하였습니다.
import pandas as pd
df = pd.read_table('lezhin_dataset_v2_training.tsv', header=None)
df_1 = df.drop(df.columns[[6,7,]], axis=1)
df_1 = df_1.dropna(axis=1)
from sklearn.preprocessing import MinMaxScaler
# very important. It does not work without it.
scaler = MinMaxScaler(feature_range=(0, 1))
df_1 = scaler.fit_transform(df_1)
df_1 = pd.DataFrame(df_1)
학습에 사용할 데이터는 전체 사용합니다. 정확도를 판단하기 위해 30% 데이터를 사용합니다.
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
import numpy as np
# 데이터 전처리
rows = len(df_1)
cut_row = rows * 7 // 10
#cut_row = rows
print(cut_row)
x_train = df_1.iloc[:,1:].values.tolist()
y_train = df_1.iloc[:,0].values.tolist()
x_test = df_1.iloc[cut_row:,1:].values.tolist()
y_test = df_1.iloc[cut_row:,0].values.tolist()
# pickle로 저장
import pickle
import os
def save_data(data, fname='data2.pic'):
if not os.path.isfile(fname):
f = open(fname, 'wb')
pickle.dump(data, f)
f.close()
def load_data(fname='data2.pic'):
f = open(fname, 'rb')
x_train,y_train,x_test,y_test = pickle.load(f)
f.close()
return x_train,y_train,x_test,y_test
save_data([x_train,y_train,x_test,y_test])
x_train, y_train, x_test, y_test = load_data()
Using TensorFlow backend.
455675
binary crossentropy를 사용하여 학습합니다.
model = Sequential()
model.add(Dense(1, input_dim=147, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.1), metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, epochs=10)
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_1 (Dense) (None, 1) 148 ================================================================= Total params: 148 Trainable params: 148 Non-trainable params: 0 _________________________________________________________________ Epoch 1/10 650965/650965 [==============================] - 54s - loss: 0.5262 - acc: 0.7354 Epoch 2/10 650965/650965 [==============================] - 51s - loss: 0.5214 - acc: 0.7396 Epoch 3/10 650965/650965 [==============================] - 60s - loss: 0.5209 - acc: 0.7404 Epoch 4/10 650965/650965 [==============================] - 62s - loss: 0.5207 - acc: 0.7402 Epoch 5/10 650965/650965 [==============================] - 51s - loss: 0.5205 - acc: 0.7405 Epoch 6/10 650965/650965 [==============================] - 52s - loss: 0.5204 - acc: 0.7408 Epoch 7/10 650965/650965 [==============================] - 50s - loss: 0.5204 - acc: 0.7405 Epoch 8/10 650965/650965 [==============================] - 51s - loss: 0.5204 - acc: 0.7404 Epoch 9/10 650965/650965 [==============================] - 52s - loss: 0.5203 - acc: 0.7407 Epoch 10/10 650965/650965 [==============================] - 53s - loss: 0.5203 - acc: 0.7406
<keras.callbacks.History at 0x28ab1f668>
해당 모델을 사용하여 정확도를 구합니다. 정확도는 약 67.64%가 나왔습니다.
loss_and_metrics = model.evaluate(x_test, y_test)
print('')
print('loss_and_metrics : ' + str(loss_and_metrics))
194496/195290 [============================>.] - ETA: 0s loss_and_metrics : [0.56578984513517916, 0.67638896000838367]
4개의 relu 계층과 1개의 sigmoid 계층을 사용하여 학습합니다.
from keras.optimizers import Adam
model = Sequential()
model.add(Dense(100, input_dim=147, init='uniform', activation='relu'))
model.add(Dense(100, init='uniform', activation='relu'))
model.add(Dense(100, init='uniform', activation='relu'))
model.add(Dense(50, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
#sgd = SGD()
#model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.1), metrics=['accuracy'])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, epochs=100)
/Users/donglyeolsin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(100, kernel_initializer="uniform", input_dim=147, activation="relu")` /Users/donglyeolsin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(100, kernel_initializer="uniform", activation="relu")` /Users/donglyeolsin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:6: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(100, kernel_initializer="uniform", activation="relu")` /Users/donglyeolsin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:7: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(50, kernel_initializer="uniform", activation="relu")` /Users/donglyeolsin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:8: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(1, kernel_initializer="uniform", activation="sigmoid")`
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_2 (Dense) (None, 100) 14800 _________________________________________________________________ dense_3 (Dense) (None, 100) 10100 _________________________________________________________________ dense_4 (Dense) (None, 100) 10100 _________________________________________________________________ dense_5 (Dense) (None, 50) 5050 _________________________________________________________________ dense_6 (Dense) (None, 1) 51 ================================================================= Total params: 40,101 Trainable params: 40,101 Non-trainable params: 0 _________________________________________________________________ Epoch 1/100 650965/650965 [==============================] - 119s - loss: 0.4386 - acc: 0.7938 Epoch 2/100 650965/650965 [==============================] - 116s - loss: 0.4090 - acc: 0.8132 Epoch 3/100 650965/650965 [==============================] - 115s - loss: 0.3985 - acc: 0.8190 Epoch 4/100 650965/650965 [==============================] - 114s - loss: 0.3913 - acc: 0.8236 Epoch 5/100 650965/650965 [==============================] - 114s - loss: 0.3860 - acc: 0.8269 Epoch 6/100 650965/650965 [==============================] - 115s - loss: 0.3816 - acc: 0.8292 Epoch 7/100 650965/650965 [==============================] - 117s - loss: 0.3779 - acc: 0.8316 Epoch 8/100 650965/650965 [==============================] - 114s - loss: 0.3746 - acc: 0.8338 Epoch 9/100 650965/650965 [==============================] - 115s - loss: 0.3718 - acc: 0.8351 Epoch 10/100 650965/650965 [==============================] - 114s - loss: 0.3692 - acc: 0.8363 Epoch 11/100 650965/650965 [==============================] - 121s - loss: 0.3668 - acc: 0.8380 Epoch 12/100 650965/650965 [==============================] - 121s - loss: 0.3647 - acc: 0.8393 Epoch 13/100 650965/650965 [==============================] - 119s - loss: 0.3626 - acc: 0.8403 Epoch 14/100 650965/650965 [==============================] - 120s - loss: 0.3609 - acc: 0.8411 Epoch 15/100 650965/650965 [==============================] - 121s - loss: 0.3589 - acc: 0.8422 Epoch 16/100 650965/650965 [==============================] - 124s - loss: 0.3572 - acc: 0.8432 Epoch 17/100 650965/650965 [==============================] - 122s - loss: 0.3552 - acc: 0.8445 Epoch 18/100 650965/650965 [==============================] - 121s - loss: 0.3540 - acc: 0.8447 Epoch 19/100 650965/650965 [==============================] - 121s - loss: 0.3522 - acc: 0.8460 Epoch 20/100 650965/650965 [==============================] - 122s - loss: 0.3508 - acc: 0.8469 Epoch 21/100 650965/650965 [==============================] - 131s - loss: 0.3494 - acc: 0.8474 Epoch 22/100 650965/650965 [==============================] - 144s - loss: 0.3483 - acc: 0.8486 Epoch 23/100 650965/650965 [==============================] - 134s - loss: 0.3471 - acc: 0.8489 Epoch 24/100 650965/650965 [==============================] - 134s - loss: 0.3456 - acc: 0.8498 Epoch 25/100 650965/650965 [==============================] - 134s - loss: 0.3445 - acc: 0.8501 Epoch 26/100 650965/650965 [==============================] - 134s - loss: 0.3434 - acc: 0.8508 Epoch 27/100 650965/650965 [==============================] - 134s - loss: 0.3424 - acc: 0.8515 Epoch 28/100 650965/650965 [==============================] - 134s - loss: 0.3411 - acc: 0.8521 Epoch 29/100 650965/650965 [==============================] - 134s - loss: 0.3405 - acc: 0.8523 Epoch 30/100 650965/650965 [==============================] - 141s - loss: 0.3393 - acc: 0.8528 Epoch 31/100 650965/650965 [==============================] - 149s - loss: 0.3390 - acc: 0.8531 Epoch 32/100 650965/650965 [==============================] - 149s - loss: 0.3378 - acc: 0.8539 Epoch 33/100 650965/650965 [==============================] - 151s - loss: 0.3373 - acc: 0.8541 Epoch 34/100 650965/650965 [==============================] - 150s - loss: 0.3363 - acc: 0.8551 Epoch 35/100 650965/650965 [==============================] - 150s - loss: 0.3356 - acc: 0.8550 Epoch 36/100 650965/650965 [==============================] - 150s - loss: 0.3347 - acc: 0.8559 Epoch 37/100 650965/650965 [==============================] - 150s - loss: 0.3339 - acc: 0.8560 Epoch 38/100 650965/650965 [==============================] - 150s - loss: 0.3333 - acc: 0.8565 Epoch 39/100 650965/650965 [==============================] - 150s - loss: 0.3327 - acc: 0.8570 Epoch 40/100 650965/650965 [==============================] - 151s - loss: 0.3322 - acc: 0.8574 Epoch 41/100 650965/650965 [==============================] - 150s - loss: 0.3313 - acc: 0.8577 Epoch 42/100 650965/650965 [==============================] - 150s - loss: 0.3302 - acc: 0.8582 Epoch 43/100 650965/650965 [==============================] - 150s - loss: 0.3298 - acc: 0.8580 Epoch 44/100 650965/650965 [==============================] - 150s - loss: 0.3295 - acc: 0.8586 Epoch 45/100 650965/650965 [==============================] - 151s - loss: 0.3287 - acc: 0.8588 Epoch 46/100 650965/650965 [==============================] - 150s - loss: 0.3285 - acc: 0.8589 Epoch 47/100 650965/650965 [==============================] - 149s - loss: 0.3279 - acc: 0.8592 Epoch 48/100 650965/650965 [==============================] - 149s - loss: 0.3271 - acc: 0.8597 Epoch 49/100 650965/650965 [==============================] - 149s - loss: 0.3264 - acc: 0.8601 Epoch 50/100 650965/650965 [==============================] - 150s - loss: 0.3264 - acc: 0.8601 Epoch 51/100 650965/650965 [==============================] - 150s - loss: 0.3260 - acc: 0.8604 Epoch 52/100 650965/650965 [==============================] - 150s - loss: 0.3250 - acc: 0.8608 Epoch 53/100 650965/650965 [==============================] - 150s - loss: 0.3248 - acc: 0.8608 Epoch 54/100 650965/650965 [==============================] - 150s - loss: 0.3243 - acc: 0.8611 Epoch 55/100 650965/650965 [==============================] - 153s - loss: 0.3237 - acc: 0.8614 Epoch 56/100 650965/650965 [==============================] - 150s - loss: 0.3232 - acc: 0.8620 Epoch 57/100 650965/650965 [==============================] - 150s - loss: 0.3228 - acc: 0.8621 Epoch 58/100 650965/650965 [==============================] - 150s - loss: 0.3225 - acc: 0.8623 Epoch 59/100 650965/650965 [==============================] - 150s - loss: 0.3223 - acc: 0.8622 Epoch 60/100 650965/650965 [==============================] - 150s - loss: 0.3220 - acc: 0.8624 Epoch 61/100 650965/650965 [==============================] - 150s - loss: 0.3213 - acc: 0.8624 Epoch 62/100 650965/650965 [==============================] - 150s - loss: 0.3210 - acc: 0.8626 Epoch 63/100 650965/650965 [==============================] - 150s - loss: 0.3209 - acc: 0.8629 Epoch 64/100 650965/650965 [==============================] - 152s - loss: 0.3204 - acc: 0.8632 Epoch 65/100 650965/650965 [==============================] - 151s - loss: 0.3204 - acc: 0.8630 Epoch 66/100 650965/650965 [==============================] - 150s - loss: 0.3200 - acc: 0.8633 Epoch 67/100 650965/650965 [==============================] - 151s - loss: 0.3194 - acc: 0.8638 Epoch 68/100 650965/650965 [==============================] - 149s - loss: 0.3190 - acc: 0.8637 Epoch 69/100 650965/650965 [==============================] - 151s - loss: 0.3190 - acc: 0.8638 Epoch 70/100 650965/650965 [==============================] - 153s - loss: 0.3185 - acc: 0.8638 Epoch 71/100 650965/650965 [==============================] - 151s - loss: 0.3182 - acc: 0.8641 Epoch 72/100 650965/650965 [==============================] - 150s - loss: 0.3181 - acc: 0.8643 Epoch 73/100 650965/650965 [==============================] - 151s - loss: 0.3177 - acc: 0.8643 Epoch 74/100 650965/650965 [==============================] - 150s - loss: 0.3173 - acc: 0.8646 Epoch 75/100 650965/650965 [==============================] - 151s - loss: 0.3174 - acc: 0.8644 Epoch 76/100 650965/650965 [==============================] - 151s - loss: 0.3171 - acc: 0.8650 Epoch 77/100 650965/650965 [==============================] - 152s - loss: 0.3169 - acc: 0.8652 Epoch 78/100 650965/650965 [==============================] - 151s - loss: 0.3166 - acc: 0.8652 Epoch 79/100 650965/650965 [==============================] - 150s - loss: 0.3161 - acc: 0.8655 Epoch 80/100 650965/650965 [==============================] - 150s - loss: 0.3162 - acc: 0.8653 Epoch 81/100 650965/650965 [==============================] - 152s - loss: 0.3158 - acc: 0.8655 Epoch 82/100 650965/650965 [==============================] - 152s - loss: 0.3156 - acc: 0.8654 Epoch 83/100 650965/650965 [==============================] - 151s - loss: 0.3159 - acc: 0.8657 Epoch 84/100 650965/650965 [==============================] - 151s - loss: 0.3155 - acc: 0.8656 Epoch 85/100 650965/650965 [==============================] - 151s - loss: 0.3154 - acc: 0.8657 Epoch 86/100 650965/650965 [==============================] - 152s - loss: 0.3150 - acc: 0.8658 Epoch 87/100 650965/650965 [==============================] - 151s - loss: 0.3147 - acc: 0.8659 Epoch 88/100 650965/650965 [==============================] - 152s - loss: 0.3149 - acc: 0.8656 Epoch 89/100 650965/650965 [==============================] - 151s - loss: 0.3142 - acc: 0.8664 Epoch 90/100 650965/650965 [==============================] - 153s - loss: 0.3141 - acc: 0.8665 Epoch 91/100 650965/650965 [==============================] - 151s - loss: 0.3141 - acc: 0.8662 Epoch 92/100 650965/650965 [==============================] - 152s - loss: 0.3138 - acc: 0.8664 Epoch 93/100 650965/650965 [==============================] - 151s - loss: 0.3136 - acc: 0.8668 Epoch 94/100 650965/650965 [==============================] - 153s - loss: 0.3132 - acc: 0.8669 Epoch 95/100 650965/650965 [==============================] - 160s - loss: 0.3132 - acc: 0.8668 Epoch 96/100 650965/650965 [==============================] - 157s - loss: 0.3135 - acc: 0.8667 Epoch 97/100 650965/650965 [==============================] - 152s - loss: 0.3131 - acc: 0.8664 Epoch 98/100 650965/650965 [==============================] - 152s - loss: 0.3125 - acc: 0.8668 Epoch 99/100 650965/650965 [==============================] - 152s - loss: 0.3127 - acc: 0.8669 Epoch 100/100 650965/650965 [==============================] - 152s - loss: 0.3125 - acc: 0.8669
<keras.callbacks.History at 0x120241978>
해당 모델을 사용하여 정확도를 구합니다. 정확도는 약 87.29%가 나왔습니다.
loss_and_metrics = model.evaluate(x_test, y_test)
print('')
print('loss_and_metrics : ' + str(loss_and_metrics))
194560/195290 [============================>.] - ETA: 0s loss_and_metrics : [0.3222619390542914, 0.87285575298030194]
해당 모델을 저장합니다.
# 모델 저장하기
from keras.models import load_model
model.save('lezhin.h5')