케라스와 OpenAI 짐을 사용한 심층 Q-러닝 네트워크. Keon Kim의 코드를 기반으로 합니다.
import random
import gym
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import os # 디렉토리 생성을 위해
env = gym.make('CartPole-v0') # 환경 초기화
/usr/local/lib/python3.8/dist-packages/gym/envs/registration.py:593: UserWarning: WARN: The environment CartPole-v0 is out of date. You should consider upgrading to version `v1`. logger.warn( /usr/local/lib/python3.8/dist-packages/gym/core.py:317: DeprecationWarning: WARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future. deprecation( /usr/local/lib/python3.8/dist-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: WARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future. deprecation(
state_size = env.observation_space.shape[0]
state_size
4
action_size = env.action_space.n
action_size
2
batch_size = 32
n_episodes = 10 # 테스트를 위해 10으로 낮춥니다. 1000 # 에이전트가 플레이할 게임 횟수
output_dir = 'model_output/cartpole/'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000) # 덱은 리스트처럼 동작하지만 양쪽 끝에 원소를 추가하거나 삭제할 수 있습니다.
self.gamma = 0.95 # 할인 계수. 에이전트는 당장의 행동과 미래 행동을 함께 고려합니다. 하지만 이 비율로 할인합니다.
self.epsilon = 1.0 # 탐험율. 얼마나 랜덤하게 동작할지 결정합니다. 입실론 감쇠 때문에 점차 값이 줄어듭니다.
self.epsilon_decay = 0.995 # 에이전트의 성능이 점차 향상됨에 따라 랜덤한 탐험 횟수를 줄입니다.
self.epsilon_min = 0.01 # 랜덤 탐험의 최소 허용량
self.learning_rate = 0.001 # 비용을 줄이기 위해 SGD를 통해 모델 파라미터를 조정할 비율
self.model = self._build_model() # 비공개 메서드
def _build_model(self):
# Q-가치 함수를 근사하기 위한 신경망
model = Sequential()
model.add(Dense(32, activation='relu',
input_dim=self.state_size)) # 첫 번째 은닉층. 상태가 입력됩니다.
model.add(Dense(32, activation='relu')) # 두 번째 은닉층
model.add(Dense(self.action_size, activation='linear')) # 행동이 2개이므로 출력 뉴런이 2개입니다(0(왼쪽)과 1(오른쪽))
model.compile(loss='mse',
optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action,
reward, next_state, done)) # 나중에 훈련을 위해 이전 경험 저장
def train(self, batch_size): # 덱에서 샘플링한 경험으로 신경망을 훈련하는 메서드
minibatch = random.sample(self.memory, batch_size) # 덱에서 미니배치를 샘플링합니다.
for state, action, reward, next_state, done in minibatch: # 미니배치 샘플에서 데이터를 추출합니다.
target = reward # done이 True이면 (게임 종료를 위한 불리언 값, 즉, 마지막 상태인지 아닌지 나타냅니다), target = reward입니다.
if not done: # done이 True가 아니면, 할인된 미래 보상을 예측합니다.
target = (reward +
self.gamma * # (target) = reward + (할인 계수 gamma) *
np.amax(self.model.predict(next_state)[0])) # (미래 행동 a'에 기반한 최대 타깃 Q)
target_f = self.model.predict(state) # 현재 상태와 할인된 미래 보상을 근사적으로 매핑합니다.
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0) # x=state, y=target_f로 지정하여 한 번의 에포크 동안 훈련합니다. target_f과 y_hat 사이의 손실을 감소시킵니다.
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def act(self, state):
if np.random.rand() <= self.epsilon: # 랜덤한 경우 무작위로 행동을 선택합니다.
return random.randrange(self.action_size)
act_values = self.model.predict(state) # 랜덤하지 않는 경우라면 현재 상태를 기반으로 보상 값을 예측합니다.
return np.argmax(act_values[0]) # 가장 높은 보상을 가지는 행동을 선택합니다(즉, 왼쪽 또는 오른쪽?)
def save(self, name):
self.model.save_weights(name)
def load(self, name):
self.model.load_weights(name)
agent = DQNAgent(state_size, action_size) # 에이전트 초기화
/usr/local/lib/python3.8/dist-packages/keras/optimizers/optimizer_v2/adam.py:110: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead. super(Adam, self).__init__(name, **kwargs)
for e in range(n_episodes): # 게임 에피소드를 반복합니다.
state = env.reset() # 새로운 에피소드를 시작할 때마다 상태를 초기화합니다.
state = np.reshape(state, [1, state_size])
done = False
time = 0 # 에피소드의 타임스텝. 가능한 오랫동안 막대를 쓰러뜨리지 않는 것이 목표입니다.
while not done:
# env.render()
action = agent.act(state) # 행동은 0 또는 1(카트를 왼쪽으로 이동 또는 오른쪽으로 이동).
next_state, reward, done, _ = env.step(action) # 에이전트는 환경과 상호작용하고 피드백으로 4개의 상태 데이터를 받습니다. 예를 들어, 막대 각도, 카트 위치.
reward = reward if not done else -10 # 막대가 쓰러지지 않으면 타임스텝마다 +1씩 보상이 증가됩니다.
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done) # 이전 타임스텝의 상태, 행동, 보상을 저장합니다.
state = next_state # 다음 스텝을 다음 반복을 위해 현재 상태로 설정합니다.
if done: # if episode ends:
print("에피소드: {}/{}, 점수: {}, e: {:.2}" # 에피소드 점수와 에이전트 입실론 값을 출력합니다.
.format(e, n_episodes-1, time, agent.epsilon))
time += 1
if len(agent.memory) > batch_size:
agent.train(batch_size) # 에피소드 경험을 재생하여 에이전트를 훈련합니다.
if e % 50 == 0:
agent.save(output_dir + "weights_"
+ '{:04d}'.format(e) + ".hdf5")
에피소드: 0/9, 점수: 28, e: 1.0 에피소드: 1/9, 점수: 45, e: 1.0 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 26ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 35ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 30ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 23ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 29ms/step 1/1 [==============================] - 0s 23ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 20ms/step 에피소드: 2/9, 점수: 13, e: 0.99 1/1 [==============================] - 0s 25ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 에피소드: 3/9, 점수: 16, e: 0.99 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 에피소드: 4/9, 점수: 24, e: 0.99 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 에피소드: 5/9, 점수: 14, e: 0.98 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 12ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 에피소드: 6/9, 점수: 12, e: 0.98 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 에피소드: 7/9, 점수: 25, e: 0.97 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 24ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 14ms/step 에피소드: 8/9, 점수: 34, e: 0.97 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 18ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 에피소드: 9/9, 점수: 18, e: 0.96 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 14ms/step
# saved agents can be loaded with agent.load("./path/filename.hdf5")