sudo apt install musescore timidity lilypond
pip install music21 matplotlib scipy tensorflow
from music21 import *
us = environment.UserSettings()
us.getSettingsPath()
'/home/tsu-nera/.music21rc'
#us["musicxmlPath"] = "/usr/bin/gedit"
us["musicxmlPath"] = "/usr/bin/musescore"
us["midiPath"] = "/usr/bin/timidity"
us["showFormat"] = "lilypond"
us["writeFormat"] = "lilypond"
us["musescoreDirectPNGPath"] = "/usr/bin/musescore"
!mkdir composer
import glob
REP="@\n"
def trim_metadata(output_path, glob_path):
comp_txt = open(output_path,"w")
ll = glob.glob(glob_path)
for song in ll:
lines = open(song,"r").readlines()
out = []
found_first = False
for l in lines:
if l.startswith("="):
## new measure, replace the measure with the @ sign, not part of humdrum
out.append(REP)
found_first = True
continue
if not found_first:
## keep going until we find the end of the header and metadata
continue
if l.startswith("!"):
## ignore comments
continue
out.append(l)
comp_txt.writelines(out)
comp_txt.close()
%mkdir kernscore
%mkdir kernscore/bach
from urllib.request import urlopen
for i in range(1,15+1):
filename = "inven{0:02d}.krn".format(i)
file = urlopen("http://kern.humdrum.org/cgi-bin/ksdata?l=osu/classical/bach/inventions&file=%s&f=kern"%filename)
with open("kernscore/bach/"+filename,'wb') as output:
output.write(file.read())
output_path = "composer/bach.txt"
glob_path = "kernscore/bach/*.krn"
trim_metadata(output_path, glob_path)
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf
filename = 'composer/bach.txt'
with open(filename, 'r') as f:
text=f.read()
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
text[:50]
'@\n4.r\t16dL\n.\t16e\n.\t16f\n.\t16g\n.\t16a\n.\t16b-J\n@\n4.r\t1'
encoded[:100]
array([11, 4, 8, 31, 15, 35, 7, 3, 27, 6, 4, 31, 35, 7, 3, 12, 4, 31, 35, 7, 3, 39, 4, 31, 35, 7, 3, 24, 4, 31, 35, 7, 3, 2, 4, 31, 35, 7, 3, 9, 13, 1, 4, 11, 4, 8, 31, 15, 35, 7, 3, 22, 32, 6, 4, 31, 35, 7, 3, 9, 13, 4, 31, 35, 7, 3, 2, 4, 31, 35, 7, 3, 24, 4, 31, 35, 7, 3, 39, 4, 31, 35, 7, 3, 12, 1, 4, 11, 4, 7, 3, 41, 6, 35, 21, 39, 6, 4, 7, 3], dtype=int32)
vocab_size = len(vocab)
vocab_size
43
def get_batches(arr, n_seqs, n_steps):
'''Create a generator that returns batches of size
n_seqs x n_steps from arr.
Arguments
---------
arr: Array you want to make batches from
n_seqs: Batch size, the number of sequences per batch
n_steps: Number of sequence steps per batch
'''
# Get the batch size and number of batches we can make
batch_size = n_seqs * n_steps
n_batches = len(arr)//batch_size
# Keep only enough characters to make full batches
arr = arr[:n_batches * batch_size]
# Reshape into n_seqs rows
arr = arr.reshape((n_seqs, -1))
for n in range(0, arr.shape[1], n_steps):
# The features
x = arr[:, n:n+n_steps]
# The targets, shifted by one
y = np.zeros_like(x)
y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
yield x, y
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])
x [[11 4 8 31 15 35 7 3 27 6] [35 7 3 24 24 42 5 4 7 3] [ 7 3 22 22 32 4 31 35 7 3] [ 9 9 4 7 3 38 35 31 4 11] [ 3 24 4 7 3 33 35 7 3 39] [ 2 2 4 7 3 27 35 31 4 7] [16 18 41 32 35 7 3 39 32 1] [35 31 4 7 3 38 35 8 9 13] [27 1 4 11 4 7 3 33 6 35] [ 7 3 24 4 11 4 7 3 0 35]] y [[ 4 8 31 15 35 7 3 27 6 4] [ 7 3 24 24 42 5 4 7 3 36] [ 3 22 22 32 4 31 35 7 3 2] [ 9 4 7 3 38 35 31 4 11 4] [24 4 7 3 33 35 7 3 39 32] [ 2 4 7 3 27 35 31 4 7 3] [18 41 32 35 7 3 39 32 1 4] [31 4 7 3 38 35 8 9 13 19] [ 1 4 11 4 7 3 33 6 35 21] [ 3 24 4 11 4 7 3 0 35 21]]
def build_inputs(batch_size, num_steps):
''' Define placeholders for inputs, targets, and dropout
Arguments
---------
batch_size: Batch size, number of sequences per batch
num_steps: Number of sequence steps in a batch
'''
# Declare placeholders we'll feed into the graph
inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
# Keep probability placeholder for drop out layers
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
return inputs, targets, keep_prob
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
''' Build LSTM cell.
Arguments
---------
keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
lstm_size: Size of the hidden layers in the LSTM cells
num_layers: Number of LSTM layers
batch_size: Batch size
'''
### Build the LSTM Cell
# Use a basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
# Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
def build_output(lstm_output, in_size, out_size):
''' Build a softmax layer, return the softmax output and logits.
Arguments
---------
x: Input tensor
in_size: Size of the input tensor, for example, size of the LSTM cells
out_size: Size of this softmax layer
'''
# Reshape output so it's a bunch of rows, one row for each step for each sequence.
# That is, the shape should be batch_size*num_steps rows by lstm_size columns
seq_output = tf.concat(lstm_output, axis=1)
x = tf.reshape(seq_output, [-1, in_size])
# Connect the RNN outputs to a softmax layer
with tf.variable_scope('softmax'):
softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
softmax_b = tf.Variable(tf.zeros(out_size))
# Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
# of rows of logit outputs, one for each step and sequence
logits = tf.matmul(x, softmax_w) + softmax_b
# Use softmax to get the probabilities for predicted characters
out = tf.nn.softmax(logits, name='predictions')
return out, logits
def build_loss(logits, targets, lstm_size, num_classes):
''' Calculate the loss from the logits and the targets.
Arguments
---------
logits: Logits from final fully connected layer
targets: Targets for supervised learning
lstm_size: Number of LSTM hidden units
num_classes: Number of classes in targets
'''
# One-hot encode targets and reshape to match logits, one row per batch_size per step
y_one_hot = tf.one_hot(targets, num_classes)
y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
# Softmax cross entropy loss
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
loss = tf.reduce_mean(loss)
return loss
def build_optimizer(loss, learning_rate, grad_clip):
''' Build optmizer for training, using gradient clipping.
Arguments:
loss: Network loss
learning_rate: Learning rate for optimizer
'''
# Optimizer for training, using gradient clipping to control exploding gradients
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return optimizer
class CharRNN:
def __init__(self, num_classes, batch_size=64, num_steps=50,
lstm_size=128, num_layers=2, learning_rate=0.001,
grad_clip=5, sampling=False):
# When we're using this network for sampling later, we'll be passing in
# one character at a time, so providing an option for that
if sampling == True:
batch_size, num_steps = 1, 1
else:
batch_size, num_steps = batch_size, num_steps
tf.reset_default_graph()
# Build the input placeholder tensors
self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
# Build the LSTM cell
cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
### Run the data through the RNN layers
# First, one-hot encode the input tokens
x_one_hot = tf.one_hot(self.inputs, num_classes)
# Run each sequence step through the RNN and collect the outputs
outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
self.final_state = state
# Get softmax predictions and logits
self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
# Loss and optimizer (with gradient clipping)
self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)
batch_size = 10
num_steps = 10
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
epochs = 20
# Save every N iterations
save_every_n = 200
model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
lstm_size=lstm_size, num_layers=num_layers,
learning_rate=learning_rate)
saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Use the line below to load a checkpoint and resume training
#saver.restore(sess, 'checkpoints/______.ckpt')
counter = 0
for e in range(epochs):
# Train network
new_state = sess.run(model.initial_state)
loss = 0
for x, y in get_batches(encoded, batch_size, num_steps):
counter += 1
start = time.time()
feed = {model.inputs: x,
model.targets: y,
model.keep_prob: keep_prob,
model.initial_state: new_state}
batch_loss, new_state, _ = sess.run([model.loss,
model.final_state,
model.optimizer],
feed_dict=feed)
end = time.time()
print('Epoch: {}/{}... '.format(e+1, epochs),
'Training Step: {}... '.format(counter),
'Training loss: {:.4f}... '.format(batch_loss),
'{:.4f} sec/batch'.format((end-start)))
if (counter % save_every_n == 0):
saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
Epoch: 1/20... Training Step: 1... Training loss: 3.7620... 0.1642 sec/batch Epoch: 1/20... Training Step: 2... Training loss: 3.7256... 0.1233 sec/batch Epoch: 1/20... Training Step: 3... Training loss: 3.6567... 0.1201 sec/batch Epoch: 1/20... Training Step: 4... Training loss: 3.4860... 0.1188 sec/batch Epoch: 1/20... Training Step: 5... Training loss: 3.2803... 0.1250 sec/batch Epoch: 1/20... Training Step: 6... Training loss: 3.6022... 0.1220 sec/batch Epoch: 1/20... Training Step: 7... Training loss: 4.0865... 0.1183 sec/batch Epoch: 1/20... Training Step: 8... Training loss: 3.1127... 0.1227 sec/batch Epoch: 1/20... Training Step: 9... Training loss: 3.3907... 0.1215 sec/batch Epoch: 1/20... Training Step: 10... Training loss: 3.4856... 0.1189 sec/batch Epoch: 1/20... Training Step: 11... Training loss: 3.4883... 0.1214 sec/batch Epoch: 1/20... Training Step: 12... Training loss: 3.3462... 0.1254 sec/batch Epoch: 1/20... Training Step: 13... Training loss: 3.3203... 0.1224 sec/batch Epoch: 1/20... Training Step: 14... Training loss: 3.1857... 0.1243 sec/batch Epoch: 1/20... Training Step: 15... Training loss: 3.1654... 0.1260 sec/batch Epoch: 1/20... Training Step: 16... Training loss: 3.3089... 0.1259 sec/batch Epoch: 1/20... Training Step: 17... Training loss: 3.0935... 0.1218 sec/batch Epoch: 1/20... Training Step: 18... Training loss: 3.0318... 0.1229 sec/batch Epoch: 1/20... Training Step: 19... Training loss: 3.1196... 0.1216 sec/batch Epoch: 1/20... Training Step: 20... Training loss: 2.9132... 0.1198 sec/batch Epoch: 1/20... Training Step: 21... Training loss: 3.1271... 0.1283 sec/batch Epoch: 1/20... Training Step: 22... Training loss: 3.1013... 0.1219 sec/batch Epoch: 1/20... Training Step: 23... Training loss: 3.1496... 0.1247 sec/batch Epoch: 1/20... Training Step: 24... Training loss: 3.0565... 0.1232 sec/batch Epoch: 1/20... Training Step: 25... Training loss: 2.9929... 0.1239 sec/batch Epoch: 1/20... Training Step: 26... Training loss: 3.0869... 0.1269 sec/batch Epoch: 1/20... Training Step: 27... Training loss: 3.2220... 0.1225 sec/batch Epoch: 1/20... Training Step: 28... Training loss: 3.1725... 0.1248 sec/batch Epoch: 1/20... Training Step: 29... Training loss: 3.0635... 0.1241 sec/batch Epoch: 1/20... Training Step: 30... Training loss: 3.1275... 0.1208 sec/batch Epoch: 1/20... Training Step: 31... Training loss: 2.8966... 0.1237 sec/batch Epoch: 1/20... Training Step: 32... Training loss: 2.9878... 0.1236 sec/batch Epoch: 1/20... Training Step: 33... Training loss: 3.0631... 0.1196 sec/batch Epoch: 1/20... Training Step: 34... Training loss: 3.0332... 0.1258 sec/batch Epoch: 1/20... Training Step: 35... Training loss: 3.0228... 0.1243 sec/batch Epoch: 1/20... Training Step: 36... Training loss: 2.9531... 0.1221 sec/batch Epoch: 1/20... Training Step: 37... Training loss: 3.0487... 0.1235 sec/batch Epoch: 1/20... Training Step: 38... Training loss: 2.9803... 0.1217 sec/batch Epoch: 1/20... Training Step: 39... Training loss: 2.9113... 0.1199 sec/batch Epoch: 1/20... Training Step: 40... Training loss: 3.1494... 0.1242 sec/batch Epoch: 1/20... Training Step: 41... Training loss: 2.8697... 0.1210 sec/batch Epoch: 1/20... Training Step: 42... Training loss: 2.9242... 0.1231 sec/batch Epoch: 1/20... Training Step: 43... Training loss: 3.0483... 0.1218 sec/batch Epoch: 1/20... Training Step: 44... Training loss: 2.8306... 0.1232 sec/batch Epoch: 1/20... Training Step: 45... Training loss: 2.8007... 0.1192 sec/batch Epoch: 1/20... Training Step: 46... Training loss: 2.9705... 0.1237 sec/batch Epoch: 1/20... Training Step: 47... Training loss: 2.8790... 0.1243 sec/batch Epoch: 1/20... Training Step: 48... Training loss: 2.9677... 0.1245 sec/batch Epoch: 1/20... Training Step: 49... Training loss: 2.8278... 0.1245 sec/batch Epoch: 1/20... Training Step: 50... Training loss: 3.0830... 0.1210 sec/batch Epoch: 1/20... Training Step: 51... Training loss: 2.8571... 0.1262 sec/batch Epoch: 1/20... Training Step: 52... Training loss: 2.9284... 0.1210 sec/batch Epoch: 1/20... Training Step: 53... Training loss: 3.1083... 0.1261 sec/batch Epoch: 1/20... Training Step: 54... Training loss: 3.0061... 0.1195 sec/batch Epoch: 1/20... Training Step: 55... Training loss: 2.9092... 0.1215 sec/batch Epoch: 1/20... Training Step: 56... Training loss: 2.9404... 0.1235 sec/batch Epoch: 1/20... Training Step: 57... Training loss: 2.9676... 0.1235 sec/batch Epoch: 1/20... Training Step: 58... Training loss: 2.9035... 0.1225 sec/batch Epoch: 1/20... Training Step: 59... Training loss: 2.8589... 0.1242 sec/batch Epoch: 1/20... Training Step: 60... Training loss: 2.8798... 0.1243 sec/batch Epoch: 1/20... Training Step: 61... Training loss: 2.9282... 0.1190 sec/batch Epoch: 1/20... Training Step: 62... Training loss: 2.9433... 0.1257 sec/batch Epoch: 1/20... Training Step: 63... Training loss: 2.8242... 0.1214 sec/batch Epoch: 1/20... Training Step: 64... Training loss: 2.8128... 0.1243 sec/batch Epoch: 1/20... Training Step: 65... Training loss: 2.8223... 0.1235 sec/batch Epoch: 1/20... Training Step: 66... Training loss: 2.9849... 0.1269 sec/batch Epoch: 1/20... Training Step: 67... Training loss: 2.9266... 0.1241 sec/batch Epoch: 1/20... Training Step: 68... Training loss: 2.9161... 0.1243 sec/batch Epoch: 1/20... Training Step: 69... Training loss: 2.8079... 0.1184 sec/batch Epoch: 1/20... Training Step: 70... Training loss: 2.8682... 0.1167 sec/batch Epoch: 1/20... Training Step: 71... Training loss: 2.8148... 0.1274 sec/batch Epoch: 1/20... Training Step: 72... Training loss: 2.9319... 0.1277 sec/batch Epoch: 1/20... Training Step: 73... Training loss: 2.8397... 0.1169 sec/batch Epoch: 1/20... Training Step: 74... Training loss: 2.8626... 0.1186 sec/batch Epoch: 1/20... Training Step: 75... Training loss: 3.0676... 0.1178 sec/batch Epoch: 1/20... Training Step: 76... Training loss: 2.9233... 0.1202 sec/batch Epoch: 1/20... Training Step: 77... Training loss: 2.7613... 0.1244 sec/batch Epoch: 1/20... Training Step: 78... Training loss: 2.9925... 0.1241 sec/batch Epoch: 1/20... Training Step: 79... Training loss: 2.7884... 0.1273 sec/batch Epoch: 1/20... Training Step: 80... Training loss: 2.7235... 0.1301 sec/batch Epoch: 1/20... Training Step: 81... Training loss: 2.9118... 0.1195 sec/batch Epoch: 1/20... Training Step: 82... Training loss: 2.9589... 0.1243 sec/batch Epoch: 1/20... Training Step: 83... Training loss: 2.7112... 0.1238 sec/batch Epoch: 1/20... Training Step: 84... Training loss: 2.8291... 0.1217 sec/batch Epoch: 1/20... Training Step: 85... Training loss: 2.7706... 0.1315 sec/batch Epoch: 1/20... Training Step: 86... Training loss: 2.6703... 0.1237 sec/batch Epoch: 1/20... Training Step: 87... Training loss: 2.7684... 0.1229 sec/batch Epoch: 1/20... Training Step: 88... Training loss: 2.8455... 0.1235 sec/batch Epoch: 1/20... Training Step: 89... Training loss: 2.9805... 0.1220 sec/batch Epoch: 1/20... Training Step: 90... Training loss: 2.6880... 0.1194 sec/batch Epoch: 1/20... Training Step: 91... Training loss: 2.8288... 0.1221 sec/batch Epoch: 1/20... Training Step: 92... Training loss: 2.8261... 0.1208 sec/batch Epoch: 1/20... Training Step: 93... Training loss: 2.6363... 0.1211 sec/batch Epoch: 1/20... Training Step: 94... Training loss: 2.8438... 0.1238 sec/batch Epoch: 1/20... Training Step: 95... Training loss: 2.5802... 0.1219 sec/batch Epoch: 1/20... Training Step: 96... Training loss: 2.7704... 0.1238 sec/batch Epoch: 1/20... Training Step: 97... Training loss: 2.9868... 0.1239 sec/batch Epoch: 1/20... Training Step: 98... Training loss: 2.8030... 0.1260 sec/batch Epoch: 1/20... Training Step: 99... Training loss: 2.6999... 0.1219 sec/batch Epoch: 1/20... Training Step: 100... Training loss: 2.7008... 0.1196 sec/batch Epoch: 1/20... Training Step: 101... Training loss: 2.7749... 0.1243 sec/batch Epoch: 1/20... Training Step: 102... Training loss: 2.8638... 0.1252 sec/batch Epoch: 1/20... Training Step: 103... Training loss: 2.7618... 0.1239 sec/batch Epoch: 1/20... Training Step: 104... Training loss: 2.6063... 0.1229 sec/batch Epoch: 1/20... Training Step: 105... Training loss: 2.6149... 0.1242 sec/batch Epoch: 1/20... Training Step: 106... Training loss: 2.9139... 0.1234 sec/batch Epoch: 1/20... Training Step: 107... Training loss: 2.9257... 0.1232 sec/batch Epoch: 1/20... Training Step: 108... Training loss: 2.9118... 0.1231 sec/batch Epoch: 1/20... Training Step: 109... Training loss: 2.7397... 0.1236 sec/batch Epoch: 1/20... Training Step: 110... Training loss: 2.7949... 0.1237 sec/batch Epoch: 1/20... Training Step: 111... Training loss: 2.7819... 0.1240 sec/batch Epoch: 1/20... Training Step: 112... Training loss: 2.7028... 0.1237 sec/batch Epoch: 1/20... Training Step: 113... Training loss: 2.6550... 0.1264 sec/batch Epoch: 1/20... Training Step: 114... Training loss: 2.8067... 0.1234 sec/batch Epoch: 1/20... Training Step: 115... Training loss: 2.6431... 0.1203 sec/batch Epoch: 1/20... Training Step: 116... Training loss: 2.5301... 0.1255 sec/batch Epoch: 1/20... Training Step: 117... Training loss: 2.6822... 0.1195 sec/batch Epoch: 1/20... Training Step: 118... Training loss: 2.6422... 0.1225 sec/batch Epoch: 1/20... Training Step: 119... Training loss: 2.5233... 0.1210 sec/batch Epoch: 1/20... Training Step: 120... Training loss: 2.3074... 0.1192 sec/batch Epoch: 1/20... Training Step: 121... Training loss: 2.4839... 0.1240 sec/batch Epoch: 1/20... Training Step: 122... Training loss: 2.5947... 0.1242 sec/batch Epoch: 1/20... Training Step: 123... Training loss: 2.6883... 0.1250 sec/batch Epoch: 1/20... Training Step: 124... Training loss: 2.4409... 0.1222 sec/batch Epoch: 1/20... Training Step: 125... Training loss: 2.5898... 0.1205 sec/batch Epoch: 1/20... Training Step: 126... Training loss: 2.3660... 0.1183 sec/batch Epoch: 1/20... Training Step: 127... Training loss: 2.3348... 0.1216 sec/batch Epoch: 1/20... Training Step: 128... Training loss: 2.5245... 0.1231 sec/batch Epoch: 1/20... Training Step: 129... Training loss: 2.5923... 0.1235 sec/batch Epoch: 1/20... Training Step: 130... Training loss: 2.4242... 0.1249 sec/batch Epoch: 1/20... Training Step: 131... Training loss: 2.8288... 0.1198 sec/batch Epoch: 1/20... Training Step: 132... Training loss: 2.3149... 0.1204 sec/batch Epoch: 1/20... Training Step: 133... Training loss: 2.3525... 0.1230 sec/batch Epoch: 1/20... Training Step: 134... Training loss: 2.3290... 0.1236 sec/batch Epoch: 1/20... Training Step: 135... Training loss: 2.1879... 0.1243 sec/batch Epoch: 1/20... Training Step: 136... Training loss: 2.1351... 0.1202 sec/batch Epoch: 1/20... Training Step: 137... Training loss: 2.4319... 0.1197 sec/batch Epoch: 1/20... Training Step: 138... Training loss: 2.4167... 0.1169 sec/batch Epoch: 1/20... Training Step: 139... Training loss: 2.2640... 0.1190 sec/batch Epoch: 1/20... Training Step: 140... Training loss: 2.3276... 0.1220 sec/batch Epoch: 1/20... Training Step: 141... Training loss: 2.4944... 0.1236 sec/batch Epoch: 1/20... Training Step: 142... Training loss: 2.3255... 0.1240 sec/batch Epoch: 1/20... Training Step: 143... Training loss: 2.3086... 0.1197 sec/batch Epoch: 1/20... Training Step: 144... Training loss: 2.2411... 0.1262 sec/batch Epoch: 1/20... Training Step: 145... Training loss: 2.3505... 0.1224 sec/batch Epoch: 1/20... Training Step: 146... Training loss: 2.2917... 0.1205 sec/batch Epoch: 1/20... Training Step: 147... Training loss: 2.4639... 0.1219 sec/batch Epoch: 1/20... Training Step: 148... Training loss: 2.0835... 0.1224 sec/batch Epoch: 1/20... Training Step: 149... Training loss: 2.2542... 0.1188 sec/batch Epoch: 1/20... Training Step: 150... Training loss: 2.3682... 0.1219 sec/batch Epoch: 1/20... Training Step: 151... Training loss: 2.2879... 0.1237 sec/batch Epoch: 1/20... Training Step: 152... Training loss: 2.2459... 0.1166 sec/batch Epoch: 1/20... Training Step: 153... Training loss: 2.4501... 0.1250 sec/batch Epoch: 1/20... Training Step: 154... Training loss: 2.3604... 0.1238 sec/batch Epoch: 1/20... Training Step: 155... Training loss: 2.2573... 0.1251 sec/batch Epoch: 1/20... Training Step: 156... Training loss: 2.1554... 0.1222 sec/batch Epoch: 1/20... Training Step: 157... Training loss: 1.9763... 0.1264 sec/batch Epoch: 1/20... Training Step: 158... Training loss: 2.0656... 0.1255 sec/batch Epoch: 1/20... Training Step: 159... Training loss: 2.0456... 0.1246 sec/batch Epoch: 1/20... Training Step: 160... Training loss: 2.1463... 0.1206 sec/batch Epoch: 1/20... Training Step: 161... Training loss: 2.4674... 0.1221 sec/batch Epoch: 1/20... Training Step: 162... Training loss: 2.1990... 0.1213 sec/batch Epoch: 1/20... Training Step: 163... Training loss: 2.1825... 0.1241 sec/batch Epoch: 1/20... Training Step: 164... Training loss: 2.0409... 0.1204 sec/batch Epoch: 1/20... Training Step: 165... Training loss: 2.1511... 0.1212 sec/batch Epoch: 1/20... Training Step: 166... Training loss: 2.0999... 0.1215 sec/batch Epoch: 1/20... Training Step: 167... Training loss: 2.0156... 0.1248 sec/batch Epoch: 1/20... Training Step: 168... Training loss: 2.2786... 0.1198 sec/batch Epoch: 1/20... Training Step: 169... Training loss: 2.2583... 0.1214 sec/batch Epoch: 1/20... Training Step: 170... Training loss: 2.1629... 0.1261 sec/batch Epoch: 1/20... Training Step: 171... Training loss: 2.1873... 0.1271 sec/batch Epoch: 1/20... Training Step: 172... Training loss: 2.1109... 0.1211 sec/batch Epoch: 1/20... Training Step: 173... Training loss: 2.2487... 0.1229 sec/batch Epoch: 1/20... Training Step: 174... Training loss: 2.1224... 0.1235 sec/batch Epoch: 1/20... Training Step: 175... Training loss: 2.1910... 0.1261 sec/batch Epoch: 1/20... Training Step: 176... Training loss: 1.9140... 0.1263 sec/batch Epoch: 1/20... Training Step: 177... Training loss: 1.9269... 0.1381 sec/batch Epoch: 1/20... Training Step: 178... Training loss: 2.2543... 0.1342 sec/batch Epoch: 1/20... Training Step: 179... Training loss: 2.0510... 0.1197 sec/batch Epoch: 1/20... Training Step: 180... Training loss: 2.2251... 0.1231 sec/batch Epoch: 1/20... Training Step: 181... Training loss: 2.0182... 0.1236 sec/batch Epoch: 1/20... Training Step: 182... Training loss: 2.1679... 0.1253 sec/batch Epoch: 1/20... Training Step: 183... Training loss: 2.1451... 0.1212 sec/batch Epoch: 1/20... Training Step: 184... Training loss: 2.1111... 0.1243 sec/batch Epoch: 1/20... Training Step: 185... Training loss: 2.2130... 0.1185 sec/batch Epoch: 1/20... Training Step: 186... Training loss: 1.8731... 0.1240 sec/batch Epoch: 1/20... Training Step: 187... Training loss: 2.0875... 0.1259 sec/batch Epoch: 1/20... Training Step: 188... Training loss: 1.7481... 0.1211 sec/batch Epoch: 1/20... Training Step: 189... Training loss: 2.1269... 0.1285 sec/batch Epoch: 1/20... Training Step: 190... Training loss: 1.8619... 0.1321 sec/batch Epoch: 1/20... Training Step: 191... Training loss: 1.9233... 0.1252 sec/batch Epoch: 1/20... Training Step: 192... Training loss: 2.0531... 0.1201 sec/batch Epoch: 1/20... Training Step: 193... Training loss: 1.9795... 0.1364 sec/batch Epoch: 1/20... Training Step: 194... Training loss: 1.8791... 0.1308 sec/batch Epoch: 1/20... Training Step: 195... Training loss: 2.2336... 0.1297 sec/batch Epoch: 1/20... Training Step: 196... Training loss: 2.0534... 0.1352 sec/batch Epoch: 1/20... Training Step: 197... Training loss: 1.8241... 0.1365 sec/batch Epoch: 1/20... Training Step: 198... Training loss: 2.0393... 0.1306 sec/batch Epoch: 1/20... Training Step: 199... Training loss: 1.7450... 0.1350 sec/batch Epoch: 1/20... Training Step: 200... Training loss: 1.7357... 0.1320 sec/batch Epoch: 1/20... Training Step: 201... Training loss: 1.9492... 0.1283 sec/batch Epoch: 1/20... Training Step: 202... Training loss: 1.9008... 0.1301 sec/batch Epoch: 1/20... Training Step: 203... Training loss: 1.8058... 0.1433 sec/batch Epoch: 1/20... Training Step: 204... Training loss: 2.0252... 0.1273 sec/batch Epoch: 1/20... Training Step: 205... Training loss: 1.7999... 0.1380 sec/batch Epoch: 1/20... Training Step: 206... Training loss: 1.7063... 0.1327 sec/batch Epoch: 1/20... Training Step: 207... Training loss: 1.8999... 0.1324 sec/batch Epoch: 1/20... Training Step: 208... Training loss: 1.8881... 0.1335 sec/batch Epoch: 1/20... Training Step: 209... Training loss: 1.8977... 0.1328 sec/batch Epoch: 1/20... Training Step: 210... Training loss: 2.0043... 0.1271 sec/batch Epoch: 1/20... Training Step: 211... Training loss: 1.5164... 0.1328 sec/batch Epoch: 1/20... Training Step: 212... Training loss: 2.0064... 0.1357 sec/batch Epoch: 1/20... Training Step: 213... Training loss: 1.9792... 0.1264 sec/batch Epoch: 1/20... Training Step: 214... Training loss: 1.7379... 0.1256 sec/batch Epoch: 1/20... Training Step: 215... Training loss: 2.1222... 0.1225 sec/batch Epoch: 1/20... Training Step: 216... Training loss: 1.7652... 0.1208 sec/batch Epoch: 1/20... Training Step: 217... Training loss: 2.0598... 0.1341 sec/batch Epoch: 1/20... Training Step: 218... Training loss: 1.9019... 0.1372 sec/batch Epoch: 1/20... Training Step: 219... Training loss: 2.0017... 0.1505 sec/batch Epoch: 1/20... Training Step: 220... Training loss: 1.9209... 0.1393 sec/batch Epoch: 1/20... Training Step: 221... Training loss: 1.8711... 0.1373 sec/batch Epoch: 1/20... Training Step: 222... Training loss: 2.1657... 0.1415 sec/batch Epoch: 1/20... Training Step: 223... Training loss: 2.0872... 0.1364 sec/batch Epoch: 1/20... Training Step: 224... Training loss: 2.0484... 0.1354 sec/batch Epoch: 1/20... Training Step: 225... Training loss: 1.9232... 0.1396 sec/batch Epoch: 1/20... Training Step: 226... Training loss: 2.0963... 0.1369 sec/batch Epoch: 1/20... Training Step: 227... Training loss: 2.0884... 0.1375 sec/batch Epoch: 1/20... Training Step: 228... Training loss: 1.9220... 0.1246 sec/batch Epoch: 1/20... Training Step: 229... Training loss: 1.9084... 0.1239 sec/batch Epoch: 1/20... Training Step: 230... Training loss: 2.1362... 0.1292 sec/batch Epoch: 1/20... Training Step: 231... Training loss: 1.8550... 0.1276 sec/batch Epoch: 1/20... Training Step: 232... Training loss: 1.8892... 0.1224 sec/batch Epoch: 1/20... Training Step: 233... Training loss: 2.2566... 0.1180 sec/batch Epoch: 1/20... Training Step: 234... Training loss: 1.9981... 0.1170 sec/batch Epoch: 1/20... Training Step: 235... Training loss: 2.0485... 0.1258 sec/batch Epoch: 1/20... Training Step: 236... Training loss: 1.8156... 0.1232 sec/batch Epoch: 1/20... Training Step: 237... Training loss: 2.1858... 0.1201 sec/batch Epoch: 1/20... Training Step: 238... Training loss: 1.8726... 0.1269 sec/batch Epoch: 1/20... Training Step: 239... Training loss: 1.9511... 0.1295 sec/batch Epoch: 1/20... Training Step: 240... Training loss: 2.1758... 0.1268 sec/batch Epoch: 1/20... Training Step: 241... Training loss: 1.9088... 0.1327 sec/batch Epoch: 1/20... Training Step: 242... Training loss: 2.0303... 0.1412 sec/batch Epoch: 1/20... Training Step: 243... Training loss: 2.1431... 0.1331 sec/batch Epoch: 1/20... Training Step: 244... Training loss: 1.8868... 0.1342 sec/batch Epoch: 1/20... Training Step: 245... Training loss: 1.8554... 0.1274 sec/batch Epoch: 1/20... Training Step: 246... Training loss: 1.6133... 0.1356 sec/batch Epoch: 1/20... Training Step: 247... Training loss: 1.8370... 0.1277 sec/batch Epoch: 1/20... Training Step: 248... Training loss: 1.9527... 0.1214 sec/batch Epoch: 1/20... Training Step: 249... Training loss: 1.8062... 0.1156 sec/batch Epoch: 1/20... Training Step: 250... Training loss: 1.7428... 0.1226 sec/batch Epoch: 1/20... Training Step: 251... Training loss: 1.9264... 0.1285 sec/batch Epoch: 1/20... Training Step: 252... Training loss: 1.8250... 0.1279 sec/batch Epoch: 1/20... Training Step: 253... Training loss: 1.7278... 0.1298 sec/batch Epoch: 1/20... Training Step: 254... Training loss: 2.1920... 0.1351 sec/batch Epoch: 1/20... Training Step: 255... Training loss: 1.8406... 0.1345 sec/batch Epoch: 1/20... Training Step: 256... Training loss: 1.7293... 0.1367 sec/batch Epoch: 1/20... Training Step: 257... Training loss: 1.9573... 0.1311 sec/batch Epoch: 1/20... Training Step: 258... Training loss: 1.9288... 0.1309 sec/batch Epoch: 1/20... Training Step: 259... Training loss: 1.9979... 0.1295 sec/batch Epoch: 1/20... Training Step: 260... Training loss: 1.9598... 0.1355 sec/batch Epoch: 1/20... Training Step: 261... Training loss: 1.9618... 0.1301 sec/batch Epoch: 1/20... Training Step: 262... Training loss: 1.9427... 0.1299 sec/batch Epoch: 1/20... Training Step: 263... Training loss: 1.9407... 0.1284 sec/batch Epoch: 1/20... Training Step: 264... Training loss: 1.9523... 0.1208 sec/batch Epoch: 1/20... Training Step: 265... Training loss: 1.9571... 0.1206 sec/batch Epoch: 1/20... Training Step: 266... Training loss: 1.8152... 0.1291 sec/batch Epoch: 1/20... Training Step: 267... Training loss: 2.0836... 0.1350 sec/batch Epoch: 1/20... Training Step: 268... Training loss: 2.0145... 0.1366 sec/batch Epoch: 1/20... Training Step: 269... Training loss: 1.9578... 0.1317 sec/batch Epoch: 1/20... Training Step: 270... Training loss: 2.1346... 0.1203 sec/batch Epoch: 1/20... Training Step: 271... Training loss: 1.7738... 0.1267 sec/batch Epoch: 1/20... Training Step: 272... Training loss: 2.0176... 0.1273 sec/batch Epoch: 1/20... Training Step: 273... Training loss: 1.9272... 0.1310 sec/batch Epoch: 1/20... Training Step: 274... Training loss: 1.8668... 0.1284 sec/batch Epoch: 1/20... Training Step: 275... Training loss: 1.9420... 0.1279 sec/batch Epoch: 1/20... Training Step: 276... Training loss: 1.9878... 0.1259 sec/batch Epoch: 1/20... Training Step: 277... Training loss: 2.0064... 0.1336 sec/batch Epoch: 1/20... Training Step: 278... Training loss: 2.0244... 0.1296 sec/batch Epoch: 1/20... Training Step: 279... Training loss: 1.7891... 0.1286 sec/batch Epoch: 1/20... Training Step: 280... Training loss: 1.7947... 0.1274 sec/batch Epoch: 1/20... Training Step: 281... Training loss: 1.6625... 0.1232 sec/batch Epoch: 1/20... Training Step: 282... Training loss: 1.7442... 0.1244 sec/batch Epoch: 1/20... Training Step: 283... Training loss: 1.6750... 0.1179 sec/batch Epoch: 1/20... Training Step: 284... Training loss: 1.8013... 0.1205 sec/batch Epoch: 1/20... Training Step: 285... Training loss: 1.6533... 0.1234 sec/batch Epoch: 1/20... Training Step: 286... Training loss: 1.8193... 0.1230 sec/batch Epoch: 1/20... Training Step: 287... Training loss: 1.6664... 0.1203 sec/batch Epoch: 1/20... Training Step: 288... Training loss: 1.8409... 0.1194 sec/batch Epoch: 1/20... Training Step: 289... Training loss: 2.0103... 0.1231 sec/batch Epoch: 1/20... Training Step: 290... Training loss: 1.8769... 0.1256 sec/batch Epoch: 1/20... Training Step: 291... Training loss: 1.9272... 0.1230 sec/batch Epoch: 1/20... Training Step: 292... Training loss: 1.7780... 0.1235 sec/batch Epoch: 1/20... Training Step: 293... Training loss: 1.7624... 0.1267 sec/batch Epoch: 1/20... Training Step: 294... Training loss: 1.9197... 0.1201 sec/batch Epoch: 1/20... Training Step: 295... Training loss: 1.8272... 0.1339 sec/batch Epoch: 1/20... Training Step: 296... Training loss: 2.0636... 0.1272 sec/batch Epoch: 1/20... Training Step: 297... Training loss: 1.8698... 0.1327 sec/batch Epoch: 1/20... Training Step: 298... Training loss: 1.8798... 0.1313 sec/batch Epoch: 1/20... Training Step: 299... Training loss: 1.8173... 0.1287 sec/batch Epoch: 1/20... Training Step: 300... Training loss: 1.8802... 0.1297 sec/batch Epoch: 1/20... Training Step: 301... Training loss: 1.8425... 0.1284 sec/batch Epoch: 1/20... Training Step: 302... Training loss: 1.7539... 0.1234 sec/batch Epoch: 1/20... Training Step: 303... Training loss: 1.6652... 0.1222 sec/batch Epoch: 1/20... Training Step: 304... Training loss: 1.9984... 0.1201 sec/batch Epoch: 1/20... Training Step: 305... Training loss: 1.6832... 0.1223 sec/batch Epoch: 1/20... Training Step: 306... Training loss: 2.0259... 0.1216 sec/batch Epoch: 1/20... Training Step: 307... Training loss: 1.7023... 0.1201 sec/batch Epoch: 1/20... Training Step: 308... Training loss: 2.2779... 0.1259 sec/batch Epoch: 1/20... Training Step: 309... Training loss: 1.7804... 0.1248 sec/batch Epoch: 1/20... Training Step: 310... Training loss: 2.0357... 0.1211 sec/batch Epoch: 1/20... Training Step: 311... Training loss: 1.9977... 0.1221 sec/batch Epoch: 1/20... Training Step: 312... Training loss: 1.8353... 0.1237 sec/batch Epoch: 1/20... Training Step: 313... Training loss: 1.9002... 0.1264 sec/batch Epoch: 1/20... Training Step: 314... Training loss: 1.8166... 0.1167 sec/batch Epoch: 1/20... Training Step: 315... Training loss: 1.5815... 0.1188 sec/batch Epoch: 1/20... Training Step: 316... Training loss: 1.8202... 0.1207 sec/batch Epoch: 1/20... Training Step: 317... Training loss: 1.9972... 0.1240 sec/batch Epoch: 1/20... Training Step: 318... Training loss: 1.6601... 0.1254 sec/batch Epoch: 1/20... Training Step: 319... Training loss: 1.8317... 0.1218 sec/batch Epoch: 1/20... Training Step: 320... Training loss: 1.7362... 0.1200 sec/batch Epoch: 1/20... Training Step: 321... Training loss: 1.7212... 0.1241 sec/batch Epoch: 1/20... Training Step: 322... Training loss: 1.9143... 0.1230 sec/batch Epoch: 1/20... Training Step: 323... Training loss: 1.5693... 0.1203 sec/batch Epoch: 1/20... Training Step: 324... Training loss: 1.5907... 0.1221 sec/batch Epoch: 1/20... Training Step: 325... Training loss: 1.5188... 0.1176 sec/batch Epoch: 1/20... Training Step: 326... Training loss: 1.7385... 0.1246 sec/batch Epoch: 1/20... Training Step: 327... Training loss: 1.7668... 0.1233 sec/batch Epoch: 1/20... Training Step: 328... Training loss: 1.7833... 0.1234 sec/batch Epoch: 1/20... Training Step: 329... Training loss: 1.8986... 0.1213 sec/batch Epoch: 1/20... Training Step: 330... Training loss: 1.7910... 0.1223 sec/batch Epoch: 1/20... Training Step: 331... Training loss: 1.7676... 0.1231 sec/batch Epoch: 1/20... Training Step: 332... Training loss: 1.9112... 0.1220 sec/batch Epoch: 1/20... Training Step: 333... Training loss: 1.8434... 0.1257 sec/batch Epoch: 1/20... Training Step: 334... Training loss: 1.7872... 0.1177 sec/batch Epoch: 1/20... Training Step: 335... Training loss: 1.9730... 0.1222 sec/batch Epoch: 1/20... Training Step: 336... Training loss: 1.9150... 0.1211 sec/batch Epoch: 1/20... Training Step: 337... Training loss: 1.8544... 0.1211 sec/batch Epoch: 1/20... Training Step: 338... Training loss: 1.9315... 0.1236 sec/batch Epoch: 1/20... Training Step: 339... Training loss: 1.8512... 0.1244 sec/batch Epoch: 1/20... Training Step: 340... Training loss: 1.7988... 0.1212 sec/batch Epoch: 1/20... Training Step: 341... Training loss: 1.6037... 0.1270 sec/batch Epoch: 1/20... Training Step: 342... Training loss: 1.8011... 0.1232 sec/batch Epoch: 1/20... Training Step: 343... Training loss: 1.5731... 0.1230 sec/batch Epoch: 1/20... Training Step: 344... Training loss: 1.9590... 0.1202 sec/batch Epoch: 1/20... Training Step: 345... Training loss: 1.8489... 0.1183 sec/batch Epoch: 1/20... Training Step: 346... Training loss: 1.6802... 0.1263 sec/batch Epoch: 1/20... Training Step: 347... Training loss: 1.5646... 0.1212 sec/batch Epoch: 1/20... Training Step: 348... Training loss: 1.9530... 0.1227 sec/batch Epoch: 1/20... Training Step: 349... Training loss: 1.7007... 0.1255 sec/batch Epoch: 1/20... Training Step: 350... Training loss: 1.8760... 0.1199 sec/batch Epoch: 1/20... Training Step: 351... Training loss: 1.7809... 0.1218 sec/batch Epoch: 1/20... Training Step: 352... Training loss: 1.8105... 0.1218 sec/batch Epoch: 1/20... Training Step: 353... Training loss: 1.6809... 0.1221 sec/batch Epoch: 1/20... Training Step: 354... Training loss: 1.4776... 0.1270 sec/batch Epoch: 1/20... Training Step: 355... Training loss: 2.0394... 0.1206 sec/batch Epoch: 1/20... Training Step: 356... Training loss: 1.7149... 0.1252 sec/batch Epoch: 1/20... Training Step: 357... Training loss: 1.5545... 0.1213 sec/batch Epoch: 1/20... Training Step: 358... Training loss: 1.8659... 0.1208 sec/batch Epoch: 1/20... Training Step: 359... Training loss: 1.7484... 0.1211 sec/batch Epoch: 1/20... Training Step: 360... Training loss: 1.6083... 0.1229 sec/batch Epoch: 1/20... Training Step: 361... Training loss: 1.8397... 0.1193 sec/batch Epoch: 1/20... Training Step: 362... Training loss: 1.7802... 0.1207 sec/batch Epoch: 1/20... Training Step: 363... Training loss: 1.8073... 0.1211 sec/batch Epoch: 1/20... Training Step: 364... Training loss: 1.7364... 0.1175 sec/batch Epoch: 1/20... Training Step: 365... Training loss: 1.7202... 0.1268 sec/batch Epoch: 1/20... Training Step: 366... Training loss: 1.8243... 0.1281 sec/batch Epoch: 1/20... Training Step: 367... Training loss: 1.6731... 0.1288 sec/batch Epoch: 1/20... Training Step: 368... Training loss: 1.7976... 0.1260 sec/batch Epoch: 1/20... Training Step: 369... Training loss: 1.7247... 0.1251 sec/batch Epoch: 1/20... Training Step: 370... Training loss: 1.7413... 0.1244 sec/batch Epoch: 1/20... Training Step: 371... Training loss: 1.8750... 0.1243 sec/batch Epoch: 1/20... Training Step: 372... Training loss: 1.8337... 0.1234 sec/batch Epoch: 1/20... Training Step: 373... Training loss: 1.8835... 0.1235 sec/batch Epoch: 1/20... Training Step: 374... Training loss: 2.0701... 0.1277 sec/batch Epoch: 1/20... Training Step: 375... Training loss: 1.7518... 0.1290 sec/batch Epoch: 1/20... Training Step: 376... Training loss: 1.8389... 0.1459 sec/batch Epoch: 1/20... Training Step: 377... Training loss: 1.7932... 0.1210 sec/batch Epoch: 1/20... Training Step: 378... Training loss: 1.8923... 0.1196 sec/batch Epoch: 1/20... Training Step: 379... Training loss: 1.6982... 0.1236 sec/batch Epoch: 1/20... Training Step: 380... Training loss: 1.8352... 0.1249 sec/batch Epoch: 1/20... Training Step: 381... Training loss: 1.7485... 0.1255 sec/batch Epoch: 1/20... Training Step: 382... Training loss: 1.7305... 0.1200 sec/batch Epoch: 1/20... Training Step: 383... Training loss: 1.7531... 0.1224 sec/batch Epoch: 1/20... Training Step: 384... Training loss: 1.7135... 0.1216 sec/batch Epoch: 1/20... Training Step: 385... Training loss: 1.5366... 0.1196 sec/batch Epoch: 1/20... Training Step: 386... Training loss: 1.8873... 0.1195 sec/batch Epoch: 1/20... Training Step: 387... Training loss: 1.8128... 0.1238 sec/batch Epoch: 1/20... Training Step: 388... Training loss: 1.7778... 0.1235 sec/batch Epoch: 1/20... Training Step: 389... Training loss: 1.7313... 0.1230 sec/batch Epoch: 1/20... Training Step: 390... Training loss: 1.7203... 0.1227 sec/batch Epoch: 1/20... Training Step: 391... Training loss: 1.6142... 0.1233 sec/batch Epoch: 1/20... Training Step: 392... Training loss: 1.6548... 0.1216 sec/batch Epoch: 1/20... Training Step: 393... Training loss: 1.6818... 0.1181 sec/batch Epoch: 1/20... Training Step: 394... Training loss: 1.8411... 0.1219 sec/batch Epoch: 1/20... Training Step: 395... Training loss: 1.5136... 0.1249 sec/batch Epoch: 1/20... Training Step: 396... Training loss: 1.8199... 0.1205 sec/batch Epoch: 1/20... Training Step: 397... Training loss: 1.5027... 0.1245 sec/batch Epoch: 1/20... Training Step: 398... Training loss: 1.4402... 0.1178 sec/batch Epoch: 1/20... Training Step: 399... Training loss: 1.6574... 0.1253 sec/batch Epoch: 1/20... Training Step: 400... Training loss: 1.5007... 0.1219 sec/batch Epoch: 1/20... Training Step: 401... Training loss: 1.8357... 0.1199 sec/batch Epoch: 1/20... Training Step: 402... Training loss: 1.7633... 0.1200 sec/batch Epoch: 1/20... Training Step: 403... Training loss: 1.6854... 0.1191 sec/batch Epoch: 1/20... Training Step: 404... Training loss: 1.8063... 0.1216 sec/batch Epoch: 1/20... Training Step: 405... Training loss: 1.6896... 0.1205 sec/batch Epoch: 1/20... Training Step: 406... Training loss: 1.7406... 0.1259 sec/batch Epoch: 1/20... Training Step: 407... Training loss: 1.8450... 0.1270 sec/batch Epoch: 1/20... Training Step: 408... Training loss: 1.9894... 0.1220 sec/batch Epoch: 1/20... Training Step: 409... Training loss: 1.7480... 0.1221 sec/batch Epoch: 1/20... Training Step: 410... Training loss: 1.7030... 0.1210 sec/batch Epoch: 1/20... Training Step: 411... Training loss: 1.7146... 0.1219 sec/batch Epoch: 1/20... Training Step: 412... Training loss: 1.7491... 0.1209 sec/batch Epoch: 1/20... Training Step: 413... Training loss: 1.6690... 0.1186 sec/batch Epoch: 1/20... Training Step: 414... Training loss: 1.6483... 0.1254 sec/batch Epoch: 1/20... Training Step: 415... Training loss: 2.0953... 0.1253 sec/batch Epoch: 1/20... Training Step: 416... Training loss: 2.0327... 0.1216 sec/batch Epoch: 1/20... Training Step: 417... Training loss: 1.9521... 0.1294 sec/batch Epoch: 1/20... Training Step: 418... Training loss: 1.6823... 0.1218 sec/batch Epoch: 1/20... Training Step: 419... Training loss: 1.8449... 0.1245 sec/batch Epoch: 1/20... Training Step: 420... Training loss: 1.6649... 0.1266 sec/batch Epoch: 1/20... Training Step: 421... Training loss: 1.8106... 0.1248 sec/batch Epoch: 1/20... Training Step: 422... Training loss: 1.6933... 0.1229 sec/batch Epoch: 1/20... Training Step: 423... Training loss: 1.9252... 0.1264 sec/batch Epoch: 1/20... Training Step: 424... Training loss: 1.9893... 0.1250 sec/batch Epoch: 1/20... Training Step: 425... Training loss: 1.7732... 0.1247 sec/batch Epoch: 1/20... Training Step: 426... Training loss: 1.7674... 0.1239 sec/batch Epoch: 1/20... Training Step: 427... Training loss: 1.6535... 0.1268 sec/batch Epoch: 1/20... Training Step: 428... Training loss: 2.0166... 0.1194 sec/batch Epoch: 1/20... Training Step: 429... Training loss: 1.6403... 0.1226 sec/batch Epoch: 1/20... Training Step: 430... Training loss: 1.7100... 0.1224 sec/batch Epoch: 1/20... Training Step: 431... Training loss: 1.9968... 0.1201 sec/batch Epoch: 1/20... Training Step: 432... Training loss: 1.7512... 0.1216 sec/batch Epoch: 1/20... Training Step: 433... Training loss: 1.9491... 0.1228 sec/batch Epoch: 1/20... Training Step: 434... Training loss: 1.9198... 0.1382 sec/batch Epoch: 1/20... Training Step: 435... Training loss: 1.6259... 0.1230 sec/batch Epoch: 1/20... Training Step: 436... Training loss: 1.8238... 0.1213 sec/batch Epoch: 1/20... Training Step: 437... Training loss: 1.7599... 0.1218 sec/batch Epoch: 1/20... Training Step: 438... Training loss: 1.8576... 0.1255 sec/batch Epoch: 1/20... Training Step: 439... Training loss: 1.9367... 0.1207 sec/batch Epoch: 1/20... Training Step: 440... Training loss: 2.1496... 0.1303 sec/batch Epoch: 1/20... Training Step: 441... Training loss: 1.9533... 0.1361 sec/batch Epoch: 1/20... Training Step: 442... Training loss: 1.7783... 0.1323 sec/batch Epoch: 1/20... Training Step: 443... Training loss: 1.8428... 0.1197 sec/batch Epoch: 1/20... Training Step: 444... Training loss: 1.7003... 0.1290 sec/batch Epoch: 1/20... Training Step: 445... Training loss: 1.7882... 0.1272 sec/batch Epoch: 1/20... Training Step: 446... Training loss: 1.7898... 0.1245 sec/batch Epoch: 1/20... Training Step: 447... Training loss: 1.7436... 0.1284 sec/batch Epoch: 1/20... Training Step: 448... Training loss: 1.6811... 0.1222 sec/batch Epoch: 1/20... Training Step: 449... Training loss: 1.7512... 0.1249 sec/batch Epoch: 1/20... Training Step: 450... Training loss: 1.7195... 0.1251 sec/batch Epoch: 1/20... Training Step: 451... Training loss: 1.8251... 0.1198 sec/batch Epoch: 1/20... Training Step: 452... Training loss: 1.7388... 0.1209 sec/batch Epoch: 1/20... Training Step: 453... Training loss: 1.6635... 0.1208 sec/batch Epoch: 1/20... Training Step: 454... Training loss: 2.0937... 0.1227 sec/batch Epoch: 1/20... Training Step: 455... Training loss: 1.8394... 0.1225 sec/batch Epoch: 1/20... Training Step: 456... Training loss: 1.6080... 0.1203 sec/batch Epoch: 1/20... Training Step: 457... Training loss: 1.7142... 0.1253 sec/batch Epoch: 1/20... Training Step: 458... Training loss: 1.7019... 0.1227 sec/batch Epoch: 1/20... Training Step: 459... Training loss: 1.5507... 0.1222 sec/batch Epoch: 1/20... Training Step: 460... Training loss: 1.7918... 0.1226 sec/batch Epoch: 1/20... Training Step: 461... Training loss: 1.9374... 0.1258 sec/batch Epoch: 1/20... Training Step: 462... Training loss: 1.7728... 0.1187 sec/batch Epoch: 1/20... Training Step: 463... Training loss: 1.7676... 0.1206 sec/batch Epoch: 1/20... Training Step: 464... Training loss: 1.5364... 0.1235 sec/batch Epoch: 2/20... Training Step: 465... Training loss: 2.1169... 0.1199 sec/batch Epoch: 2/20... Training Step: 466... Training loss: 1.8043... 0.1209 sec/batch Epoch: 2/20... Training Step: 467... Training loss: 1.7437... 0.1196 sec/batch Epoch: 2/20... Training Step: 468... Training loss: 1.6801... 0.1214 sec/batch Epoch: 2/20... Training Step: 469... Training loss: 2.0278... 0.1229 sec/batch Epoch: 2/20... Training Step: 470... Training loss: 1.5746... 0.1217 sec/batch Epoch: 2/20... Training Step: 471... Training loss: 1.8345... 0.1220 sec/batch Epoch: 2/20... Training Step: 472... Training loss: 1.6416... 0.1232 sec/batch Epoch: 2/20... Training Step: 473... Training loss: 1.7075... 0.1241 sec/batch Epoch: 2/20... Training Step: 474... Training loss: 1.9914... 0.1202 sec/batch Epoch: 2/20... Training Step: 475... Training loss: 1.5417... 0.1212 sec/batch Epoch: 2/20... Training Step: 476... Training loss: 1.5214... 0.1212 sec/batch Epoch: 2/20... Training Step: 477... Training loss: 1.8553... 0.1235 sec/batch Epoch: 2/20... Training Step: 478... Training loss: 1.5335... 0.1228 sec/batch Epoch: 2/20... Training Step: 479... Training loss: 1.8862... 0.1184 sec/batch Epoch: 2/20... Training Step: 480... Training loss: 1.7181... 0.1239 sec/batch Epoch: 2/20... Training Step: 481... Training loss: 1.4988... 0.1216 sec/batch Epoch: 2/20... Training Step: 482... Training loss: 1.5411... 0.1235 sec/batch Epoch: 2/20... Training Step: 483... Training loss: 1.6608... 0.1201 sec/batch Epoch: 2/20... Training Step: 484... Training loss: 1.4726... 0.1181 sec/batch Epoch: 2/20... Training Step: 485... Training loss: 1.6968... 0.1271 sec/batch Epoch: 2/20... Training Step: 486... Training loss: 1.5778... 0.1228 sec/batch Epoch: 2/20... Training Step: 487... Training loss: 1.9038... 0.1221 sec/batch Epoch: 2/20... Training Step: 488... Training loss: 1.5238... 0.1251 sec/batch Epoch: 2/20... Training Step: 489... Training loss: 1.6305... 0.1235 sec/batch Epoch: 2/20... Training Step: 490... Training loss: 1.6736... 0.1207 sec/batch Epoch: 2/20... Training Step: 491... Training loss: 1.7360... 0.1220 sec/batch Epoch: 2/20... Training Step: 492... Training loss: 1.5628... 0.1215 sec/batch Epoch: 2/20... Training Step: 493... Training loss: 1.6163... 0.1211 sec/batch Epoch: 2/20... Training Step: 494... Training loss: 1.6658... 0.1230 sec/batch Epoch: 2/20... Training Step: 495... Training loss: 1.4685... 0.1184 sec/batch Epoch: 2/20... Training Step: 496... Training loss: 1.6188... 0.1200 sec/batch Epoch: 2/20... Training Step: 497... Training loss: 1.4422... 0.1180 sec/batch Epoch: 2/20... Training Step: 498... Training loss: 1.5989... 0.1241 sec/batch Epoch: 2/20... Training Step: 499... Training loss: 1.6538... 0.1219 sec/batch Epoch: 2/20... Training Step: 500... Training loss: 1.5559... 0.1235 sec/batch Epoch: 2/20... Training Step: 501... Training loss: 1.6995... 0.1220 sec/batch Epoch: 2/20... Training Step: 502... Training loss: 1.5303... 0.1238 sec/batch Epoch: 2/20... Training Step: 503... Training loss: 1.5401... 0.1172 sec/batch Epoch: 2/20... Training Step: 504... Training loss: 1.8869... 0.1249 sec/batch Epoch: 2/20... Training Step: 505... Training loss: 1.5657... 0.1235 sec/batch Epoch: 2/20... Training Step: 506... Training loss: 1.5236... 0.1210 sec/batch Epoch: 2/20... Training Step: 507... Training loss: 1.8203... 0.1213 sec/batch Epoch: 2/20... Training Step: 508... Training loss: 1.4910... 0.1218 sec/batch Epoch: 2/20... Training Step: 509... Training loss: 1.6385... 0.1188 sec/batch Epoch: 2/20... Training Step: 510... Training loss: 1.5416... 0.1244 sec/batch Epoch: 2/20... Training Step: 511... Training loss: 1.4825... 0.1223 sec/batch Epoch: 2/20... Training Step: 512... Training loss: 1.8408... 0.1237 sec/batch Epoch: 2/20... Training Step: 513... Training loss: 1.5825... 0.1248 sec/batch Epoch: 2/20... Training Step: 514... Training loss: 1.8381... 0.1233 sec/batch Epoch: 2/20... Training Step: 515... Training loss: 1.6678... 0.1225 sec/batch Epoch: 2/20... Training Step: 516... Training loss: 1.8468... 0.1225 sec/batch Epoch: 2/20... Training Step: 517... Training loss: 1.7648... 0.1205 sec/batch Epoch: 2/20... Training Step: 518... Training loss: 1.6612... 0.1214 sec/batch Epoch: 2/20... Training Step: 519... Training loss: 1.5334... 0.1220 sec/batch Epoch: 2/20... Training Step: 520... Training loss: 1.6670... 0.1264 sec/batch Epoch: 2/20... Training Step: 521... Training loss: 1.8123... 0.1248 sec/batch Epoch: 2/20... Training Step: 522... Training loss: 1.8160... 0.1246 sec/batch Epoch: 2/20... Training Step: 523... Training loss: 1.6110... 0.1191 sec/batch Epoch: 2/20... Training Step: 524... Training loss: 1.6792... 0.1206 sec/batch Epoch: 2/20... Training Step: 525... Training loss: 1.6498... 0.1249 sec/batch Epoch: 2/20... Training Step: 526... Training loss: 1.8336... 0.1202 sec/batch Epoch: 2/20... Training Step: 527... Training loss: 1.5916... 0.1234 sec/batch Epoch: 2/20... Training Step: 528... Training loss: 1.7026... 0.1227 sec/batch Epoch: 2/20... Training Step: 529... Training loss: 1.4911... 0.1241 sec/batch Epoch: 2/20... Training Step: 530... Training loss: 1.6888... 0.1217 sec/batch Epoch: 2/20... Training Step: 531... Training loss: 1.6631... 0.1197 sec/batch Epoch: 2/20... Training Step: 532... Training loss: 1.7223... 0.1233 sec/batch Epoch: 2/20... Training Step: 533... Training loss: 1.6294... 0.1231 sec/batch Epoch: 2/20... Training Step: 534... Training loss: 1.6289... 0.1279 sec/batch Epoch: 2/20... Training Step: 535... Training loss: 1.7408... 0.1231 sec/batch Epoch: 2/20... Training Step: 536... Training loss: 1.8541... 0.1194 sec/batch Epoch: 2/20... Training Step: 537... Training loss: 1.6901... 0.1215 sec/batch Epoch: 2/20... Training Step: 538... Training loss: 1.5052... 0.1240 sec/batch Epoch: 2/20... Training Step: 539... Training loss: 1.9474... 0.1161 sec/batch Epoch: 2/20... Training Step: 540... Training loss: 1.5172... 0.1202 sec/batch Epoch: 2/20... Training Step: 541... Training loss: 1.4782... 0.1203 sec/batch Epoch: 2/20... Training Step: 542... Training loss: 1.7421... 0.1222 sec/batch Epoch: 2/20... Training Step: 543... Training loss: 1.7581... 0.1230 sec/batch Epoch: 2/20... Training Step: 544... Training loss: 1.5988... 0.1178 sec/batch Epoch: 2/20... Training Step: 545... Training loss: 1.6857... 0.1171 sec/batch Epoch: 2/20... Training Step: 546... Training loss: 1.7556... 0.1237 sec/batch Epoch: 2/20... Training Step: 547... Training loss: 1.5255... 0.1232 sec/batch Epoch: 2/20... Training Step: 548... Training loss: 1.7376... 0.1200 sec/batch Epoch: 2/20... Training Step: 549... Training loss: 1.6665... 0.1200 sec/batch Epoch: 2/20... Training Step: 550... Training loss: 1.5863... 0.1202 sec/batch Epoch: 2/20... Training Step: 551... Training loss: 1.5182... 0.1184 sec/batch Epoch: 2/20... Training Step: 552... Training loss: 1.7099... 0.1178 sec/batch Epoch: 2/20... Training Step: 553... Training loss: 1.9698... 0.1189 sec/batch Epoch: 2/20... Training Step: 554... Training loss: 1.5779... 0.1222 sec/batch Epoch: 2/20... Training Step: 555... Training loss: 1.7203... 0.1233 sec/batch Epoch: 2/20... Training Step: 556... Training loss: 1.8608... 0.1221 sec/batch Epoch: 2/20... Training Step: 557... Training loss: 1.4067... 0.1226 sec/batch Epoch: 2/20... Training Step: 558... Training loss: 1.8412... 0.1214 sec/batch Epoch: 2/20... Training Step: 559... Training loss: 1.4484... 0.1245 sec/batch Epoch: 2/20... Training Step: 560... Training loss: 1.7325... 0.1258 sec/batch Epoch: 2/20... Training Step: 561... Training loss: 2.0178... 0.1231 sec/batch Epoch: 2/20... Training Step: 562... Training loss: 1.7388... 0.1219 sec/batch Epoch: 2/20... Training Step: 563... Training loss: 1.7662... 0.1240 sec/batch Epoch: 2/20... Training Step: 564... Training loss: 1.5795... 0.1206 sec/batch Epoch: 2/20... Training Step: 565... Training loss: 1.6619... 0.1283 sec/batch Epoch: 2/20... Training Step: 566... Training loss: 1.8696... 0.1230 sec/batch Epoch: 2/20... Training Step: 567... Training loss: 1.9462... 0.1194 sec/batch Epoch: 2/20... Training Step: 568... Training loss: 1.6107... 0.1226 sec/batch Epoch: 2/20... Training Step: 569... Training loss: 1.8654... 0.1231 sec/batch Epoch: 2/20... Training Step: 570... Training loss: 2.0711... 0.1209 sec/batch Epoch: 2/20... Training Step: 571... Training loss: 1.9632... 0.1206 sec/batch Epoch: 2/20... Training Step: 572... Training loss: 2.0685... 0.1212 sec/batch Epoch: 2/20... Training Step: 573... Training loss: 1.9684... 0.1233 sec/batch Epoch: 2/20... Training Step: 574... Training loss: 1.8868... 0.1275 sec/batch Epoch: 2/20... Training Step: 575... Training loss: 1.8931... 0.1193 sec/batch Epoch: 2/20... Training Step: 576... Training loss: 1.8381... 0.1197 sec/batch Epoch: 2/20... Training Step: 577... Training loss: 1.8021... 0.1183 sec/batch Epoch: 2/20... Training Step: 578... Training loss: 1.9651... 0.1182 sec/batch Epoch: 2/20... Training Step: 579... Training loss: 1.8136... 0.1184 sec/batch Epoch: 2/20... Training Step: 580... Training loss: 1.6910... 0.1174 sec/batch Epoch: 2/20... Training Step: 581... Training loss: 1.9931... 0.1183 sec/batch Epoch: 2/20... Training Step: 582... Training loss: 2.0034... 0.1180 sec/batch Epoch: 2/20... Training Step: 583... Training loss: 1.7529... 0.1211 sec/batch Epoch: 2/20... Training Step: 584... Training loss: 1.6017... 0.1224 sec/batch Epoch: 2/20... Training Step: 585... Training loss: 1.7148... 0.1195 sec/batch Epoch: 2/20... Training Step: 586... Training loss: 1.7608... 0.1220 sec/batch Epoch: 2/20... Training Step: 587... Training loss: 1.9276... 0.1203 sec/batch Epoch: 2/20... Training Step: 588... Training loss: 1.9657... 0.1240 sec/batch Epoch: 2/20... Training Step: 589... Training loss: 1.9765... 0.1337 sec/batch Epoch: 2/20... Training Step: 590... Training loss: 1.6106... 0.1296 sec/batch Epoch: 2/20... Training Step: 591... Training loss: 1.6972... 0.1229 sec/batch Epoch: 2/20... Training Step: 592... Training loss: 1.6956... 0.1212 sec/batch Epoch: 2/20... Training Step: 593... Training loss: 1.8264... 0.1205 sec/batch Epoch: 2/20... Training Step: 594... Training loss: 1.7785... 0.1188 sec/batch Epoch: 2/20... Training Step: 595... Training loss: 2.0840... 0.1218 sec/batch Epoch: 2/20... Training Step: 596... Training loss: 1.8626... 0.1222 sec/batch Epoch: 2/20... Training Step: 597... Training loss: 1.6291... 0.1234 sec/batch Epoch: 2/20... Training Step: 598... Training loss: 1.7861... 0.1194 sec/batch Epoch: 2/20... Training Step: 599... Training loss: 1.6117... 0.1248 sec/batch Epoch: 2/20... Training Step: 600... Training loss: 1.5718... 0.1232 sec/batch Epoch: 2/20... Training Step: 601... Training loss: 1.6199... 0.1235 sec/batch Epoch: 2/20... Training Step: 602... Training loss: 1.7399... 0.1192 sec/batch Epoch: 2/20... Training Step: 603... Training loss: 1.5251... 0.1201 sec/batch Epoch: 2/20... Training Step: 604... Training loss: 1.6950... 0.1233 sec/batch Epoch: 2/20... Training Step: 605... Training loss: 1.7904... 0.1260 sec/batch Epoch: 2/20... Training Step: 606... Training loss: 1.7237... 0.1219 sec/batch Epoch: 2/20... Training Step: 607... Training loss: 1.5295... 0.1212 sec/batch Epoch: 2/20... Training Step: 608... Training loss: 1.6737... 0.1216 sec/batch Epoch: 2/20... Training Step: 609... Training loss: 1.7418... 0.1224 sec/batch Epoch: 2/20... Training Step: 610... Training loss: 1.6588... 0.1210 sec/batch Epoch: 2/20... Training Step: 611... Training loss: 1.9643... 0.1240 sec/batch Epoch: 2/20... Training Step: 612... Training loss: 1.6860... 0.1183 sec/batch Epoch: 2/20... Training Step: 613... Training loss: 1.7044... 0.1217 sec/batch Epoch: 2/20... Training Step: 614... Training loss: 1.7717... 0.1236 sec/batch Epoch: 2/20... Training Step: 615... Training loss: 1.9744... 0.1204 sec/batch Epoch: 2/20... Training Step: 616... Training loss: 1.8483... 0.1229 sec/batch Epoch: 2/20... Training Step: 617... Training loss: 1.8924... 0.1230 sec/batch Epoch: 2/20... Training Step: 618... Training loss: 1.8024... 0.1223 sec/batch Epoch: 2/20... Training Step: 619... Training loss: 1.8520... 0.1198 sec/batch Epoch: 2/20... Training Step: 620... Training loss: 1.5388... 0.1195 sec/batch Epoch: 2/20... Training Step: 621... Training loss: 1.5904... 0.1208 sec/batch Epoch: 2/20... Training Step: 622... Training loss: 1.7615... 0.1214 sec/batch Epoch: 2/20... Training Step: 623... Training loss: 1.6085... 0.1234 sec/batch Epoch: 2/20... Training Step: 624... Training loss: 1.6241... 0.1237 sec/batch Epoch: 2/20... Training Step: 625... Training loss: 1.8298... 0.1250 sec/batch Epoch: 2/20... Training Step: 626... Training loss: 1.6570... 0.1229 sec/batch Epoch: 2/20... Training Step: 627... Training loss: 1.7788... 0.1267 sec/batch Epoch: 2/20... Training Step: 628... Training loss: 1.4951... 0.1235 sec/batch Epoch: 2/20... Training Step: 629... Training loss: 1.6211... 0.1262 sec/batch Epoch: 2/20... Training Step: 630... Training loss: 1.5436... 0.1209 sec/batch Epoch: 2/20... Training Step: 631... Training loss: 1.7008... 0.1217 sec/batch Epoch: 2/20... Training Step: 632... Training loss: 1.8135... 0.1212 sec/batch Epoch: 2/20... Training Step: 633... Training loss: 1.8089... 0.1251 sec/batch Epoch: 2/20... Training Step: 634... Training loss: 1.8158... 0.1202 sec/batch Epoch: 2/20... Training Step: 635... Training loss: 1.7606... 0.1213 sec/batch Epoch: 2/20... Training Step: 636... Training loss: 1.6653... 0.1216 sec/batch Epoch: 2/20... Training Step: 637... Training loss: 1.8007... 0.1215 sec/batch Epoch: 2/20... Training Step: 638... Training loss: 1.7540... 0.1243 sec/batch Epoch: 2/20... Training Step: 639... Training loss: 1.7894... 0.1247 sec/batch Epoch: 2/20... Training Step: 640... Training loss: 1.4989... 0.1209 sec/batch Epoch: 2/20... Training Step: 641... Training loss: 1.5423... 0.1193 sec/batch Epoch: 2/20... Training Step: 642... Training loss: 1.8979... 0.1229 sec/batch Epoch: 2/20... Training Step: 643... Training loss: 1.6102... 0.1244 sec/batch Epoch: 2/20... Training Step: 644... Training loss: 1.8565... 0.1245 sec/batch Epoch: 2/20... Training Step: 645... Training loss: 1.5797... 0.1293 sec/batch Epoch: 2/20... Training Step: 646... Training loss: 1.8037... 0.1279 sec/batch Epoch: 2/20... Training Step: 647... Training loss: 1.7730... 0.1312 sec/batch Epoch: 2/20... Training Step: 648... Training loss: 1.7959... 0.1239 sec/batch Epoch: 2/20... Training Step: 649... Training loss: 1.8847... 0.1234 sec/batch Epoch: 2/20... Training Step: 650... Training loss: 1.6873... 0.1232 sec/batch Epoch: 2/20... Training Step: 651... Training loss: 1.8867... 0.1251 sec/batch Epoch: 2/20... Training Step: 652... Training loss: 1.5452... 0.1236 sec/batch Epoch: 2/20... Training Step: 653... Training loss: 1.7488... 0.1221 sec/batch Epoch: 2/20... Training Step: 654... Training loss: 1.6784... 0.1245 sec/batch Epoch: 2/20... Training Step: 655... Training loss: 1.4720... 0.1264 sec/batch Epoch: 2/20... Training Step: 656... Training loss: 1.8478... 0.1243 sec/batch Epoch: 2/20... Training Step: 657... Training loss: 1.7053... 0.1222 sec/batch Epoch: 2/20... Training Step: 658... Training loss: 1.7101... 0.1206 sec/batch Epoch: 2/20... Training Step: 659... Training loss: 1.8143... 0.1210 sec/batch Epoch: 2/20... Training Step: 660... Training loss: 1.7087... 0.1220 sec/batch Epoch: 2/20... Training Step: 661... Training loss: 1.4732... 0.1224 sec/batch Epoch: 2/20... Training Step: 662... Training loss: 1.7075... 0.1230 sec/batch Epoch: 2/20... Training Step: 663... Training loss: 1.5147... 0.1248 sec/batch Epoch: 2/20... Training Step: 664... Training loss: 1.5947... 0.1236 sec/batch Epoch: 2/20... Training Step: 665... Training loss: 1.7045... 0.1227 sec/batch Epoch: 2/20... Training Step: 666... Training loss: 1.6102... 0.1226 sec/batch Epoch: 2/20... Training Step: 667... Training loss: 1.5347... 0.1256 sec/batch Epoch: 2/20... Training Step: 668... Training loss: 1.7791... 0.1275 sec/batch Epoch: 2/20... Training Step: 669... Training loss: 1.6174... 0.1349 sec/batch Epoch: 2/20... Training Step: 670... Training loss: 1.5101... 0.1356 sec/batch Epoch: 2/20... Training Step: 671... Training loss: 1.5001... 0.1285 sec/batch Epoch: 2/20... Training Step: 672... Training loss: 1.7598... 0.1323 sec/batch Epoch: 2/20... Training Step: 673... Training loss: 1.5729... 0.1286 sec/batch Epoch: 2/20... Training Step: 674... Training loss: 1.5274... 0.1186 sec/batch Epoch: 2/20... Training Step: 675... Training loss: 1.3030... 0.1212 sec/batch Epoch: 2/20... Training Step: 676... Training loss: 1.7295... 0.1234 sec/batch Epoch: 2/20... Training Step: 677... Training loss: 1.7557... 0.1276 sec/batch Epoch: 2/20... Training Step: 678... Training loss: 1.5915... 0.1235 sec/batch Epoch: 2/20... Training Step: 679... Training loss: 1.7897... 0.1793 sec/batch Epoch: 2/20... Training Step: 680... Training loss: 1.4899... 0.1643 sec/batch Epoch: 2/20... Training Step: 681... Training loss: 1.6719... 0.1357 sec/batch Epoch: 2/20... Training Step: 682... Training loss: 1.6153... 0.1455 sec/batch Epoch: 2/20... Training Step: 683... Training loss: 1.7885... 0.1401 sec/batch Epoch: 2/20... Training Step: 684... Training loss: 1.6302... 0.1201 sec/batch Epoch: 2/20... Training Step: 685... Training loss: 1.5734... 0.1197 sec/batch Epoch: 2/20... Training Step: 686... Training loss: 1.8202... 0.1236 sec/batch Epoch: 2/20... Training Step: 687... Training loss: 1.8504... 0.1229 sec/batch Epoch: 2/20... Training Step: 688... Training loss: 1.9010... 0.1262 sec/batch Epoch: 2/20... Training Step: 689... Training loss: 1.6481... 0.1198 sec/batch Epoch: 2/20... Training Step: 690... Training loss: 1.8181... 0.1224 sec/batch Epoch: 2/20... Training Step: 691... Training loss: 1.8177... 0.1186 sec/batch Epoch: 2/20... Training Step: 692... Training loss: 1.5322... 0.1246 sec/batch Epoch: 2/20... Training Step: 693... Training loss: 1.5682... 0.1203 sec/batch Epoch: 2/20... Training Step: 694... Training loss: 1.6734... 0.1195 sec/batch Epoch: 2/20... Training Step: 695... Training loss: 1.5798... 0.1223 sec/batch Epoch: 2/20... Training Step: 696... Training loss: 1.5021... 0.1162 sec/batch Epoch: 2/20... Training Step: 697... Training loss: 1.9915... 0.1208 sec/batch Epoch: 2/20... Training Step: 698... Training loss: 1.6299... 0.1180 sec/batch Epoch: 2/20... Training Step: 699... Training loss: 1.8016... 0.1236 sec/batch Epoch: 2/20... Training Step: 700... Training loss: 1.5611... 0.1258 sec/batch Epoch: 2/20... Training Step: 701... Training loss: 2.0087... 0.1231 sec/batch Epoch: 2/20... Training Step: 702... Training loss: 1.5452... 0.1183 sec/batch Epoch: 2/20... Training Step: 703... Training loss: 1.6698... 0.1229 sec/batch Epoch: 2/20... Training Step: 704... Training loss: 1.8368... 0.1209 sec/batch Epoch: 2/20... Training Step: 705... Training loss: 1.6080... 0.1225 sec/batch Epoch: 2/20... Training Step: 706... Training loss: 1.6187... 0.1137 sec/batch Epoch: 2/20... Training Step: 707... Training loss: 1.9185... 0.1197 sec/batch Epoch: 2/20... Training Step: 708... Training loss: 1.6918... 0.1221 sec/batch Epoch: 2/20... Training Step: 709... Training loss: 1.6527... 0.1235 sec/batch Epoch: 2/20... Training Step: 710... Training loss: 1.3810... 0.1221 sec/batch Epoch: 2/20... Training Step: 711... Training loss: 1.5839... 0.1228 sec/batch Epoch: 2/20... Training Step: 712... Training loss: 1.7290... 0.1217 sec/batch Epoch: 2/20... Training Step: 713... Training loss: 1.6177... 0.1224 sec/batch Epoch: 2/20... Training Step: 714... Training loss: 1.5815... 0.1236 sec/batch Epoch: 2/20... Training Step: 715... Training loss: 1.7203... 0.1201 sec/batch Epoch: 2/20... Training Step: 716... Training loss: 1.6022... 0.1257 sec/batch Epoch: 2/20... Training Step: 717... Training loss: 1.4405... 0.1269 sec/batch Epoch: 2/20... Training Step: 718... Training loss: 1.7779... 0.1215 sec/batch Epoch: 2/20... Training Step: 719... Training loss: 1.6135... 0.1228 sec/batch Epoch: 2/20... Training Step: 720... Training loss: 1.6352... 0.1187 sec/batch Epoch: 2/20... Training Step: 721... Training loss: 1.7968... 0.1210 sec/batch Epoch: 2/20... Training Step: 722... Training loss: 1.5118... 0.1202 sec/batch Epoch: 2/20... Training Step: 723... Training loss: 1.6732... 0.1296 sec/batch Epoch: 2/20... Training Step: 724... Training loss: 1.6489... 0.1237 sec/batch Epoch: 2/20... Training Step: 725... Training loss: 1.8273... 0.1233 sec/batch Epoch: 2/20... Training Step: 726... Training loss: 1.6901... 0.1188 sec/batch Epoch: 2/20... Training Step: 727... Training loss: 1.6478... 0.1249 sec/batch Epoch: 2/20... Training Step: 728... Training loss: 1.8091... 0.1242 sec/batch Epoch: 2/20... Training Step: 729... Training loss: 1.7358... 0.1222 sec/batch Epoch: 2/20... Training Step: 730... Training loss: 1.6705... 0.1199 sec/batch Epoch: 2/20... Training Step: 731... Training loss: 1.7879... 0.1221 sec/batch Epoch: 2/20... Training Step: 732... Training loss: 1.8142... 0.1214 sec/batch Epoch: 2/20... Training Step: 733... Training loss: 1.7720... 0.1208 sec/batch Epoch: 2/20... Training Step: 734... Training loss: 1.8787... 0.1196 sec/batch Epoch: 2/20... Training Step: 735... Training loss: 1.6196... 0.1184 sec/batch Epoch: 2/20... Training Step: 736... Training loss: 1.7935... 0.1209 sec/batch Epoch: 2/20... Training Step: 737... Training loss: 1.7010... 0.1235 sec/batch Epoch: 2/20... Training Step: 738... Training loss: 1.5914... 0.1216 sec/batch Epoch: 2/20... Training Step: 739... Training loss: 1.7384... 0.1179 sec/batch Epoch: 2/20... Training Step: 740... Training loss: 1.6520... 0.1200 sec/batch Epoch: 2/20... Training Step: 741... Training loss: 1.6665... 0.1246 sec/batch Epoch: 2/20... Training Step: 742... Training loss: 1.9144... 0.1206 sec/batch Epoch: 2/20... Training Step: 743... Training loss: 1.4846... 0.1192 sec/batch Epoch: 2/20... Training Step: 744... Training loss: 1.5296... 0.1244 sec/batch Epoch: 2/20... Training Step: 745... Training loss: 1.5161... 0.1229 sec/batch Epoch: 2/20... Training Step: 746... Training loss: 1.5085... 0.1221 sec/batch Epoch: 2/20... Training Step: 747... Training loss: 1.5750... 0.1233 sec/batch Epoch: 2/20... Training Step: 748... Training loss: 1.5892... 0.1219 sec/batch Epoch: 2/20... Training Step: 749... Training loss: 1.4467... 0.1194 sec/batch Epoch: 2/20... Training Step: 750... Training loss: 1.5899... 0.1163 sec/batch Epoch: 2/20... Training Step: 751... Training loss: 1.6438... 0.1220 sec/batch Epoch: 2/20... Training Step: 752... Training loss: 1.7068... 0.1321 sec/batch Epoch: 2/20... Training Step: 753... Training loss: 1.8248... 0.1280 sec/batch Epoch: 2/20... Training Step: 754... Training loss: 1.7047... 0.1251 sec/batch Epoch: 2/20... Training Step: 755... Training loss: 1.6354... 0.1208 sec/batch Epoch: 2/20... Training Step: 756... Training loss: 1.5990... 0.1218 sec/batch Epoch: 2/20... Training Step: 757... Training loss: 1.5000... 0.1219 sec/batch Epoch: 2/20... Training Step: 758... Training loss: 1.5278... 0.1247 sec/batch Epoch: 2/20... Training Step: 759... Training loss: 1.5278... 0.1251 sec/batch Epoch: 2/20... Training Step: 760... Training loss: 1.8009... 0.1206 sec/batch Epoch: 2/20... Training Step: 761... Training loss: 1.5023... 0.1196 sec/batch Epoch: 2/20... Training Step: 762... Training loss: 1.6568... 0.1220 sec/batch Epoch: 2/20... Training Step: 763... Training loss: 1.6426... 0.1202 sec/batch Epoch: 2/20... Training Step: 764... Training loss: 1.6477... 0.1237 sec/batch Epoch: 2/20... Training Step: 765... Training loss: 1.5885... 0.1231 sec/batch Epoch: 2/20... Training Step: 766... Training loss: 1.6078... 0.1240 sec/batch Epoch: 2/20... Training Step: 767... Training loss: 1.4139... 0.1251 sec/batch Epoch: 2/20... Training Step: 768... Training loss: 1.8775... 0.1215 sec/batch Epoch: 2/20... Training Step: 769... Training loss: 1.5152... 0.1250 sec/batch Epoch: 2/20... Training Step: 770... Training loss: 1.8197... 0.1175 sec/batch Epoch: 2/20... Training Step: 771... Training loss: 1.6091... 0.1251 sec/batch Epoch: 2/20... Training Step: 772... Training loss: 1.9504... 0.1265 sec/batch Epoch: 2/20... Training Step: 773... Training loss: 1.6483... 0.1250 sec/batch Epoch: 2/20... Training Step: 774... Training loss: 1.6440... 0.1169 sec/batch Epoch: 2/20... Training Step: 775... Training loss: 1.6952... 0.1235 sec/batch Epoch: 2/20... Training Step: 776... Training loss: 1.6463... 0.1295 sec/batch Epoch: 2/20... Training Step: 777... Training loss: 1.6828... 0.1209 sec/batch Epoch: 2/20... Training Step: 778... Training loss: 1.6365... 0.1232 sec/batch Epoch: 2/20... Training Step: 779... Training loss: 1.4231... 0.1188 sec/batch Epoch: 2/20... Training Step: 780... Training loss: 1.5930... 0.1212 sec/batch Epoch: 2/20... Training Step: 781... Training loss: 1.6856... 0.1220 sec/batch Epoch: 2/20... Training Step: 782... Training loss: 1.4957... 0.1235 sec/batch Epoch: 2/20... Training Step: 783... Training loss: 1.6320... 0.1189 sec/batch Epoch: 2/20... Training Step: 784... Training loss: 1.5432... 0.1234 sec/batch Epoch: 2/20... Training Step: 785... Training loss: 1.4257... 0.1232 sec/batch Epoch: 2/20... Training Step: 786... Training loss: 1.7358... 0.1234 sec/batch Epoch: 2/20... Training Step: 787... Training loss: 1.4737... 0.1219 sec/batch Epoch: 2/20... Training Step: 788... Training loss: 1.4110... 0.1243 sec/batch Epoch: 2/20... Training Step: 789... Training loss: 1.4146... 0.1178 sec/batch Epoch: 2/20... Training Step: 790... Training loss: 1.5268... 0.1191 sec/batch Epoch: 2/20... Training Step: 791... Training loss: 1.6573... 0.1208 sec/batch Epoch: 2/20... Training Step: 792... Training loss: 1.4660... 0.1230 sec/batch Epoch: 2/20... Training Step: 793... Training loss: 1.6335... 0.1257 sec/batch Epoch: 2/20... Training Step: 794... Training loss: 1.4822... 0.1194 sec/batch Epoch: 2/20... Training Step: 795... Training loss: 1.5909... 0.1238 sec/batch Epoch: 2/20... Training Step: 796... Training loss: 1.5352... 0.1192 sec/batch Epoch: 2/20... Training Step: 797... Training loss: 1.4827... 0.1234 sec/batch Epoch: 2/20... Training Step: 798... Training loss: 1.5295... 0.1216 sec/batch Epoch: 2/20... Training Step: 799... Training loss: 1.8338... 0.1261 sec/batch Epoch: 2/20... Training Step: 800... Training loss: 1.6561... 0.1217 sec/batch Epoch: 2/20... Training Step: 801... Training loss: 1.6718... 0.1240 sec/batch Epoch: 2/20... Training Step: 802... Training loss: 1.6799... 0.1221 sec/batch Epoch: 2/20... Training Step: 803... Training loss: 1.6305... 0.1195 sec/batch Epoch: 2/20... Training Step: 804... Training loss: 1.6059... 0.1215 sec/batch Epoch: 2/20... Training Step: 805... Training loss: 1.5072... 0.1228 sec/batch Epoch: 2/20... Training Step: 806... Training loss: 1.5934... 0.1245 sec/batch Epoch: 2/20... Training Step: 807... Training loss: 1.4723... 0.1217 sec/batch Epoch: 2/20... Training Step: 808... Training loss: 1.8214... 0.1237 sec/batch Epoch: 2/20... Training Step: 809... Training loss: 1.5360... 0.1225 sec/batch Epoch: 2/20... Training Step: 810... Training loss: 1.4941... 0.1201 sec/batch Epoch: 2/20... Training Step: 811... Training loss: 1.5319... 0.1236 sec/batch Epoch: 2/20... Training Step: 812... Training loss: 1.8912... 0.1190 sec/batch Epoch: 2/20... Training Step: 813... Training loss: 1.5442... 0.1204 sec/batch Epoch: 2/20... Training Step: 814... Training loss: 1.5836... 0.1218 sec/batch Epoch: 2/20... Training Step: 815... Training loss: 1.4504... 0.1245 sec/batch Epoch: 2/20... Training Step: 816... Training loss: 1.4963... 0.1211 sec/batch Epoch: 2/20... Training Step: 817... Training loss: 1.3978... 0.1253 sec/batch Epoch: 2/20... Training Step: 818... Training loss: 1.2765... 0.1211 sec/batch Epoch: 2/20... Training Step: 819... Training loss: 1.8025... 0.1184 sec/batch Epoch: 2/20... Training Step: 820... Training loss: 1.4530... 0.1242 sec/batch Epoch: 2/20... Training Step: 821... Training loss: 1.4771... 0.1208 sec/batch Epoch: 2/20... Training Step: 822... Training loss: 1.6415... 0.1236 sec/batch Epoch: 2/20... Training Step: 823... Training loss: 1.5389... 0.1283 sec/batch Epoch: 2/20... Training Step: 824... Training loss: 1.3361... 0.1234 sec/batch Epoch: 2/20... Training Step: 825... Training loss: 1.7247... 0.1187 sec/batch Epoch: 2/20... Training Step: 826... Training loss: 1.5560... 0.1225 sec/batch Epoch: 2/20... Training Step: 827... Training loss: 1.4898... 0.1238 sec/batch Epoch: 2/20... Training Step: 828... Training loss: 1.5243... 0.1196 sec/batch Epoch: 2/20... Training Step: 829... Training loss: 1.4751... 0.1236 sec/batch Epoch: 2/20... Training Step: 830... Training loss: 1.5885... 0.1209 sec/batch Epoch: 2/20... Training Step: 831... Training loss: 1.3582... 0.1225 sec/batch Epoch: 2/20... Training Step: 832... Training loss: 1.6574... 0.1239 sec/batch Epoch: 2/20... Training Step: 833... Training loss: 1.6347... 0.1206 sec/batch Epoch: 2/20... Training Step: 834... Training loss: 1.6155... 0.1215 sec/batch Epoch: 2/20... Training Step: 835... Training loss: 1.5048... 0.1202 sec/batch Epoch: 2/20... Training Step: 836... Training loss: 1.5085... 0.1191 sec/batch Epoch: 2/20... Training Step: 837... Training loss: 1.6117... 0.1245 sec/batch Epoch: 2/20... Training Step: 838... Training loss: 1.7322... 0.1242 sec/batch Epoch: 2/20... Training Step: 839... Training loss: 1.5658... 0.1229 sec/batch Epoch: 2/20... Training Step: 840... Training loss: 1.5773... 0.1226 sec/batch Epoch: 2/20... Training Step: 841... Training loss: 1.6765... 0.1220 sec/batch Epoch: 2/20... Training Step: 842... Training loss: 1.6058... 0.1201 sec/batch Epoch: 2/20... Training Step: 843... Training loss: 1.4991... 0.1241 sec/batch Epoch: 2/20... Training Step: 844... Training loss: 1.8051... 0.1201 sec/batch Epoch: 2/20... Training Step: 845... Training loss: 1.5494... 0.1167 sec/batch Epoch: 2/20... Training Step: 846... Training loss: 1.4711... 0.1233 sec/batch Epoch: 2/20... Training Step: 847... Training loss: 1.4854... 0.1215 sec/batch Epoch: 2/20... Training Step: 848... Training loss: 1.7007... 0.1216 sec/batch Epoch: 2/20... Training Step: 849... Training loss: 1.4295... 0.1249 sec/batch Epoch: 2/20... Training Step: 850... Training loss: 1.6863... 0.1221 sec/batch Epoch: 2/20... Training Step: 851... Training loss: 1.6460... 0.1252 sec/batch Epoch: 2/20... Training Step: 852... Training loss: 1.4551... 0.1169 sec/batch Epoch: 2/20... Training Step: 853... Training loss: 1.6202... 0.1222 sec/batch Epoch: 2/20... Training Step: 854... Training loss: 1.6456... 0.1245 sec/batch Epoch: 2/20... Training Step: 855... Training loss: 1.4674... 0.1238 sec/batch Epoch: 2/20... Training Step: 856... Training loss: 1.4540... 0.1181 sec/batch Epoch: 2/20... Training Step: 857... Training loss: 1.4891... 0.1229 sec/batch Epoch: 2/20... Training Step: 858... Training loss: 1.6085... 0.1213 sec/batch Epoch: 2/20... Training Step: 859... Training loss: 1.3830... 0.1222 sec/batch Epoch: 2/20... Training Step: 860... Training loss: 1.6827... 0.1206 sec/batch Epoch: 2/20... Training Step: 861... Training loss: 1.3171... 0.1231 sec/batch Epoch: 2/20... Training Step: 862... Training loss: 1.3113... 0.1263 sec/batch Epoch: 2/20... Training Step: 863... Training loss: 1.5239... 0.1317 sec/batch Epoch: 2/20... Training Step: 864... Training loss: 1.3605... 0.1394 sec/batch Epoch: 2/20... Training Step: 865... Training loss: 1.6535... 0.1397 sec/batch Epoch: 2/20... Training Step: 866... Training loss: 1.5430... 0.1356 sec/batch Epoch: 2/20... Training Step: 867... Training loss: 1.4408... 0.1318 sec/batch Epoch: 2/20... Training Step: 868... Training loss: 1.5662... 0.1235 sec/batch Epoch: 2/20... Training Step: 869... Training loss: 1.4922... 0.1241 sec/batch Epoch: 2/20... Training Step: 870... Training loss: 1.7384... 0.1287 sec/batch Epoch: 2/20... Training Step: 871... Training loss: 1.5672... 0.1232 sec/batch Epoch: 2/20... Training Step: 872... Training loss: 1.8103... 0.1210 sec/batch Epoch: 2/20... Training Step: 873... Training loss: 1.5248... 0.1240 sec/batch Epoch: 2/20... Training Step: 874... Training loss: 1.5134... 0.1195 sec/batch Epoch: 2/20... Training Step: 875... Training loss: 1.4664... 0.1216 sec/batch Epoch: 2/20... Training Step: 876... Training loss: 1.4864... 0.1236 sec/batch Epoch: 2/20... Training Step: 877... Training loss: 1.6520... 0.1235 sec/batch Epoch: 2/20... Training Step: 878... Training loss: 1.5669... 0.1227 sec/batch Epoch: 2/20... Training Step: 879... Training loss: 1.8312... 0.1279 sec/batch Epoch: 2/20... Training Step: 880... Training loss: 1.7645... 0.1224 sec/batch Epoch: 2/20... Training Step: 881... Training loss: 1.7802... 0.1231 sec/batch Epoch: 2/20... Training Step: 882... Training loss: 1.4438... 0.1211 sec/batch Epoch: 2/20... Training Step: 883... Training loss: 1.5683... 0.1246 sec/batch Epoch: 2/20... Training Step: 884... Training loss: 1.3227... 0.1179 sec/batch Epoch: 2/20... Training Step: 885... Training loss: 1.6070... 0.1201 sec/batch Epoch: 2/20... Training Step: 886... Training loss: 1.5553... 0.1225 sec/batch Epoch: 2/20... Training Step: 887... Training loss: 1.6264... 0.1215 sec/batch Epoch: 2/20... Training Step: 888... Training loss: 1.7644... 0.1215 sec/batch Epoch: 2/20... Training Step: 889... Training loss: 1.4650... 0.1268 sec/batch Epoch: 2/20... Training Step: 890... Training loss: 1.6550... 0.1206 sec/batch Epoch: 2/20... Training Step: 891... Training loss: 1.5634... 0.1265 sec/batch Epoch: 2/20... Training Step: 892... Training loss: 1.8245... 0.1254 sec/batch Epoch: 2/20... Training Step: 893... Training loss: 1.4900... 0.1210 sec/batch Epoch: 2/20... Training Step: 894... Training loss: 1.5772... 0.1218 sec/batch Epoch: 2/20... Training Step: 895... Training loss: 1.8296... 0.1242 sec/batch Epoch: 2/20... Training Step: 896... Training loss: 1.5620... 0.1300 sec/batch Epoch: 2/20... Training Step: 897... Training loss: 1.8698... 0.1343 sec/batch Epoch: 2/20... Training Step: 898... Training loss: 1.6986... 0.1357 sec/batch Epoch: 2/20... Training Step: 899... Training loss: 1.4239... 0.1191 sec/batch Epoch: 2/20... Training Step: 900... Training loss: 1.6140... 0.1219 sec/batch Epoch: 2/20... Training Step: 901... Training loss: 1.5297... 0.1268 sec/batch Epoch: 2/20... Training Step: 902... Training loss: 1.7523... 0.1198 sec/batch Epoch: 2/20... Training Step: 903... Training loss: 1.8285... 0.1243 sec/batch Epoch: 2/20... Training Step: 904... Training loss: 1.7777... 0.1244 sec/batch Epoch: 2/20... Training Step: 905... Training loss: 1.6053... 0.1240 sec/batch Epoch: 2/20... Training Step: 906... Training loss: 1.4899... 0.1242 sec/batch Epoch: 2/20... Training Step: 907... Training loss: 1.7333... 0.1223 sec/batch Epoch: 2/20... Training Step: 908... Training loss: 1.5857... 0.1244 sec/batch Epoch: 2/20... Training Step: 909... Training loss: 1.6375... 0.1235 sec/batch Epoch: 2/20... Training Step: 910... Training loss: 1.5512... 0.1222 sec/batch Epoch: 2/20... Training Step: 911... Training loss: 1.6102... 0.1236 sec/batch Epoch: 2/20... Training Step: 912... Training loss: 1.5020... 0.1223 sec/batch Epoch: 2/20... Training Step: 913... Training loss: 1.4942... 0.1268 sec/batch Epoch: 2/20... Training Step: 914... Training loss: 1.5327... 0.1223 sec/batch Epoch: 2/20... Training Step: 915... Training loss: 1.5921... 0.1206 sec/batch Epoch: 2/20... Training Step: 916... Training loss: 1.6016... 0.1236 sec/batch Epoch: 2/20... Training Step: 917... Training loss: 1.5069... 0.1227 sec/batch Epoch: 2/20... Training Step: 918... Training loss: 1.8236... 0.1219 sec/batch Epoch: 2/20... Training Step: 919... Training loss: 1.7293... 0.1249 sec/batch Epoch: 2/20... Training Step: 920... Training loss: 1.5183... 0.1228 sec/batch Epoch: 2/20... Training Step: 921... Training loss: 1.4369... 0.1184 sec/batch Epoch: 2/20... Training Step: 922... Training loss: 1.4567... 0.1240 sec/batch Epoch: 2/20... Training Step: 923... Training loss: 1.4554... 0.1246 sec/batch Epoch: 2/20... Training Step: 924... Training loss: 1.5544... 0.1297 sec/batch Epoch: 2/20... Training Step: 925... Training loss: 1.7085... 0.1255 sec/batch Epoch: 2/20... Training Step: 926... Training loss: 1.5437... 0.1240 sec/batch Epoch: 2/20... Training Step: 927... Training loss: 1.6608... 0.1226 sec/batch Epoch: 2/20... Training Step: 928... Training loss: 1.3968... 0.1187 sec/batch Epoch: 3/20... Training Step: 929... Training loss: 1.9449... 0.1186 sec/batch Epoch: 3/20... Training Step: 930... Training loss: 1.5780... 0.1178 sec/batch Epoch: 3/20... Training Step: 931... Training loss: 1.4143... 0.1228 sec/batch Epoch: 3/20... Training Step: 932... Training loss: 1.4950... 0.1215 sec/batch Epoch: 3/20... Training Step: 933... Training loss: 1.6531... 0.1225 sec/batch Epoch: 3/20... Training Step: 934... Training loss: 1.3630... 0.1230 sec/batch Epoch: 3/20... Training Step: 935... Training loss: 1.7588... 0.1272 sec/batch Epoch: 3/20... Training Step: 936... Training loss: 1.4835... 0.1206 sec/batch Epoch: 3/20... Training Step: 937... Training loss: 1.4910... 0.1202 sec/batch Epoch: 3/20... Training Step: 938... Training loss: 1.7330... 0.1177 sec/batch Epoch: 3/20... Training Step: 939... Training loss: 1.4915... 0.1176 sec/batch Epoch: 3/20... Training Step: 940... Training loss: 1.2877... 0.1227 sec/batch Epoch: 3/20... Training Step: 941... Training loss: 1.7049... 0.1183 sec/batch Epoch: 3/20... Training Step: 942... Training loss: 1.3454... 0.1214 sec/batch Epoch: 3/20... Training Step: 943... Training loss: 1.5023... 0.1248 sec/batch Epoch: 3/20... Training Step: 944... Training loss: 1.5896... 0.1223 sec/batch Epoch: 3/20... Training Step: 945... Training loss: 1.3567... 0.1321 sec/batch Epoch: 3/20... Training Step: 946... Training loss: 1.3792... 0.1288 sec/batch Epoch: 3/20... Training Step: 947... Training loss: 1.4189... 0.1255 sec/batch Epoch: 3/20... Training Step: 948... Training loss: 1.3969... 0.1185 sec/batch Epoch: 3/20... Training Step: 949... Training loss: 1.6710... 0.1233 sec/batch Epoch: 3/20... Training Step: 950... Training loss: 1.4185... 0.1224 sec/batch Epoch: 3/20... Training Step: 951... Training loss: 1.6852... 0.1223 sec/batch Epoch: 3/20... Training Step: 952... Training loss: 1.3690... 0.1263 sec/batch Epoch: 3/20... Training Step: 953... Training loss: 1.4494... 0.1225 sec/batch Epoch: 3/20... Training Step: 954... Training loss: 1.5974... 0.1224 sec/batch Epoch: 3/20... Training Step: 955... Training loss: 1.5788... 0.1268 sec/batch Epoch: 3/20... Training Step: 956... Training loss: 1.3920... 0.1221 sec/batch Epoch: 3/20... Training Step: 957... Training loss: 1.5513... 0.1200 sec/batch Epoch: 3/20... Training Step: 958... Training loss: 1.4653... 0.1218 sec/batch Epoch: 3/20... Training Step: 959... Training loss: 1.3200... 0.1240 sec/batch Epoch: 3/20... Training Step: 960... Training loss: 1.4287... 0.1249 sec/batch Epoch: 3/20... Training Step: 961... Training loss: 1.3820... 0.1249 sec/batch Epoch: 3/20... Training Step: 962... Training loss: 1.3546... 0.1176 sec/batch Epoch: 3/20... Training Step: 963... Training loss: 1.3718... 0.1220 sec/batch Epoch: 3/20... Training Step: 964... Training loss: 1.4093... 0.1228 sec/batch Epoch: 3/20... Training Step: 965... Training loss: 1.5570... 0.1228 sec/batch Epoch: 3/20... Training Step: 966... Training loss: 1.3754... 0.1236 sec/batch Epoch: 3/20... Training Step: 967... Training loss: 1.3773... 0.1185 sec/batch Epoch: 3/20... Training Step: 968... Training loss: 1.6443... 0.1251 sec/batch Epoch: 3/20... Training Step: 969... Training loss: 1.3945... 0.1199 sec/batch Epoch: 3/20... Training Step: 970... Training loss: 1.3672... 0.1200 sec/batch Epoch: 3/20... Training Step: 971... Training loss: 1.5994... 0.1226 sec/batch Epoch: 3/20... Training Step: 972... Training loss: 1.3363... 0.1240 sec/batch Epoch: 3/20... Training Step: 973... Training loss: 1.4538... 0.1204 sec/batch Epoch: 3/20... Training Step: 974... Training loss: 1.3994... 0.1225 sec/batch Epoch: 3/20... Training Step: 975... Training loss: 1.3655... 0.1199 sec/batch Epoch: 3/20... Training Step: 976... Training loss: 1.5634... 0.1207 sec/batch Epoch: 3/20... Training Step: 977... Training loss: 1.4231... 0.1213 sec/batch Epoch: 3/20... Training Step: 978... Training loss: 1.7701... 0.1199 sec/batch Epoch: 3/20... Training Step: 979... Training loss: 1.4485... 0.1221 sec/batch Epoch: 3/20... Training Step: 980... Training loss: 1.5609... 0.1254 sec/batch Epoch: 3/20... Training Step: 981... Training loss: 1.7103... 0.1217 sec/batch Epoch: 3/20... Training Step: 982... Training loss: 1.5043... 0.1146 sec/batch Epoch: 3/20... Training Step: 983... Training loss: 1.2968... 0.1188 sec/batch Epoch: 3/20... Training Step: 984... Training loss: 1.4487... 0.1233 sec/batch Epoch: 3/20... Training Step: 985... Training loss: 1.7074... 0.1232 sec/batch Epoch: 3/20... Training Step: 986... Training loss: 1.6142... 0.1224 sec/batch Epoch: 3/20... Training Step: 987... Training loss: 1.3614... 0.1232 sec/batch Epoch: 3/20... Training Step: 988... Training loss: 1.4105... 0.1208 sec/batch Epoch: 3/20... Training Step: 989... Training loss: 1.5193... 0.1241 sec/batch Epoch: 3/20... Training Step: 990... Training loss: 1.5993... 0.1178 sec/batch Epoch: 3/20... Training Step: 991... Training loss: 1.3827... 0.1199 sec/batch Epoch: 3/20... Training Step: 992... Training loss: 1.4665... 0.1265 sec/batch Epoch: 3/20... Training Step: 993... Training loss: 1.3230... 0.1242 sec/batch Epoch: 3/20... Training Step: 994... Training loss: 1.6459... 0.1214 sec/batch Epoch: 3/20... Training Step: 995... Training loss: 1.4463... 0.1243 sec/batch Epoch: 3/20... Training Step: 996... Training loss: 1.6212... 0.1204 sec/batch Epoch: 3/20... Training Step: 997... Training loss: 1.3989... 0.1204 sec/batch Epoch: 3/20... Training Step: 998... Training loss: 1.4242... 0.1238 sec/batch Epoch: 3/20... Training Step: 999... Training loss: 1.5636... 0.1213 sec/batch Epoch: 3/20... Training Step: 1000... Training loss: 1.5529... 0.1234 sec/batch Epoch: 3/20... Training Step: 1001... Training loss: 1.5257... 0.1177 sec/batch Epoch: 3/20... Training Step: 1002... Training loss: 1.4005... 0.1252 sec/batch Epoch: 3/20... Training Step: 1003... Training loss: 1.7731... 0.1245 sec/batch Epoch: 3/20... Training Step: 1004... Training loss: 1.4202... 0.1186 sec/batch Epoch: 3/20... Training Step: 1005... Training loss: 1.3353... 0.1217 sec/batch Epoch: 3/20... Training Step: 1006... Training loss: 1.4132... 0.1213 sec/batch Epoch: 3/20... Training Step: 1007... Training loss: 1.4927... 0.1216 sec/batch Epoch: 3/20... Training Step: 1008... Training loss: 1.3253... 0.1216 sec/batch Epoch: 3/20... Training Step: 1009... Training loss: 1.5526... 0.1250 sec/batch Epoch: 3/20... Training Step: 1010... Training loss: 1.4232... 0.1196 sec/batch Epoch: 3/20... Training Step: 1011... Training loss: 1.3292... 0.1196 sec/batch Epoch: 3/20... Training Step: 1012... Training loss: 1.5740... 0.1256 sec/batch Epoch: 3/20... Training Step: 1013... Training loss: 1.4884... 0.1178 sec/batch Epoch: 3/20... Training Step: 1014... Training loss: 1.5495... 0.1208 sec/batch Epoch: 3/20... Training Step: 1015... Training loss: 1.3761... 0.1253 sec/batch Epoch: 3/20... Training Step: 1016... Training loss: 1.5496... 0.1255 sec/batch Epoch: 3/20... Training Step: 1017... Training loss: 1.7168... 0.1249 sec/batch Epoch: 3/20... Training Step: 1018... Training loss: 1.4293... 0.1175 sec/batch Epoch: 3/20... Training Step: 1019... Training loss: 1.5856... 0.1164 sec/batch Epoch: 3/20... Training Step: 1020... Training loss: 1.8311... 0.1222 sec/batch Epoch: 3/20... Training Step: 1021... Training loss: 1.3409... 0.1226 sec/batch Epoch: 3/20... Training Step: 1022... Training loss: 1.8408... 0.1253 sec/batch Epoch: 3/20... Training Step: 1023... Training loss: 1.4086... 0.1221 sec/batch Epoch: 3/20... Training Step: 1024... Training loss: 1.5127... 0.1212 sec/batch Epoch: 3/20... Training Step: 1025... Training loss: 1.9485... 0.1273 sec/batch Epoch: 3/20... Training Step: 1026... Training loss: 1.6082... 0.1271 sec/batch Epoch: 3/20... Training Step: 1027... Training loss: 1.5680... 0.1217 sec/batch Epoch: 3/20... Training Step: 1028... Training loss: 1.4651... 0.1236 sec/batch Epoch: 3/20... Training Step: 1029... Training loss: 1.4863... 0.1217 sec/batch Epoch: 3/20... Training Step: 1030... Training loss: 1.8156... 0.1183 sec/batch Epoch: 3/20... Training Step: 1031... Training loss: 1.8167... 0.1229 sec/batch Epoch: 3/20... Training Step: 1032... Training loss: 1.5805... 0.1231 sec/batch Epoch: 3/20... Training Step: 1033... Training loss: 1.6752... 0.1198 sec/batch Epoch: 3/20... Training Step: 1034... Training loss: 1.8935... 0.1246 sec/batch Epoch: 3/20... Training Step: 1035... Training loss: 1.7469... 0.1196 sec/batch Epoch: 3/20... Training Step: 1036... Training loss: 1.8368... 0.1240 sec/batch Epoch: 3/20... Training Step: 1037... Training loss: 1.8881... 0.1188 sec/batch Epoch: 3/20... Training Step: 1038... Training loss: 1.5683... 0.1230 sec/batch Epoch: 3/20... Training Step: 1039... Training loss: 1.6711... 0.1254 sec/batch Epoch: 3/20... Training Step: 1040... Training loss: 1.6341... 0.1196 sec/batch Epoch: 3/20... Training Step: 1041... Training loss: 1.5969... 0.1217 sec/batch Epoch: 3/20... Training Step: 1042... Training loss: 1.8003... 0.1228 sec/batch Epoch: 3/20... Training Step: 1043... Training loss: 1.6066... 0.1214 sec/batch Epoch: 3/20... Training Step: 1044... Training loss: 1.4393... 0.1198 sec/batch Epoch: 3/20... Training Step: 1045... Training loss: 1.7296... 0.1231 sec/batch Epoch: 3/20... Training Step: 1046... Training loss: 1.7490... 0.1211 sec/batch Epoch: 3/20... Training Step: 1047... Training loss: 1.6345... 0.1210 sec/batch Epoch: 3/20... Training Step: 1048... Training loss: 1.4011... 0.1202 sec/batch Epoch: 3/20... Training Step: 1049... Training loss: 1.5176... 0.1219 sec/batch Epoch: 3/20... Training Step: 1050... Training loss: 1.6017... 0.1204 sec/batch Epoch: 3/20... Training Step: 1051... Training loss: 1.7129... 0.1179 sec/batch Epoch: 3/20... Training Step: 1052... Training loss: 1.6136... 0.1223 sec/batch Epoch: 3/20... Training Step: 1053... Training loss: 1.6677... 0.1204 sec/batch Epoch: 3/20... Training Step: 1054... Training loss: 1.3784... 0.1184 sec/batch Epoch: 3/20... Training Step: 1055... Training loss: 1.4385... 0.1215 sec/batch Epoch: 3/20... Training Step: 1056... Training loss: 1.5424... 0.1260 sec/batch Epoch: 3/20... Training Step: 1057... Training loss: 1.6811... 0.1172 sec/batch Epoch: 3/20... Training Step: 1058... Training loss: 1.4956... 0.1194 sec/batch Epoch: 3/20... Training Step: 1059... Training loss: 1.8544... 0.1348 sec/batch Epoch: 3/20... Training Step: 1060... Training loss: 1.6321... 0.1359 sec/batch Epoch: 3/20... Training Step: 1061... Training loss: 1.5454... 0.1209 sec/batch Epoch: 3/20... Training Step: 1062... Training loss: 1.6454... 0.1221 sec/batch Epoch: 3/20... Training Step: 1063... Training loss: 1.4355... 0.1192 sec/batch Epoch: 3/20... Training Step: 1064... Training loss: 1.4099... 0.1232 sec/batch Epoch: 3/20... Training Step: 1065... Training loss: 1.3926... 0.1248 sec/batch Epoch: 3/20... Training Step: 1066... Training loss: 1.4718... 0.1207 sec/batch Epoch: 3/20... Training Step: 1067... Training loss: 1.4711... 0.1203 sec/batch Epoch: 3/20... Training Step: 1068... Training loss: 1.5006... 0.1210 sec/batch Epoch: 3/20... Training Step: 1069... Training loss: 1.4499... 0.1232 sec/batch Epoch: 3/20... Training Step: 1070... Training loss: 1.4404... 0.1260 sec/batch Epoch: 3/20... Training Step: 1071... Training loss: 1.3352... 0.1286 sec/batch Epoch: 3/20... Training Step: 1072... Training loss: 1.4516... 0.1287 sec/batch Epoch: 3/20... Training Step: 1073... Training loss: 1.5586... 0.1205 sec/batch Epoch: 3/20... Training Step: 1074... Training loss: 1.4360... 0.1222 sec/batch Epoch: 3/20... Training Step: 1075... Training loss: 1.6583... 0.1214 sec/batch Epoch: 3/20... Training Step: 1076... Training loss: 1.4968... 0.1215 sec/batch Epoch: 3/20... Training Step: 1077... Training loss: 1.5031... 0.1245 sec/batch Epoch: 3/20... Training Step: 1078... Training loss: 1.6054... 0.1214 sec/batch Epoch: 3/20... Training Step: 1079... Training loss: 1.6738... 0.1265 sec/batch Epoch: 3/20... Training Step: 1080... Training loss: 1.6745... 0.1237 sec/batch Epoch: 3/20... Training Step: 1081... Training loss: 1.7202... 0.1313 sec/batch Epoch: 3/20... Training Step: 1082... Training loss: 1.6778... 0.1264 sec/batch Epoch: 3/20... Training Step: 1083... Training loss: 1.6715... 0.1232 sec/batch Epoch: 3/20... Training Step: 1084... Training loss: 1.4047... 0.1207 sec/batch Epoch: 3/20... Training Step: 1085... Training loss: 1.4174... 0.1226 sec/batch Epoch: 3/20... Training Step: 1086... Training loss: 1.4059... 0.1210 sec/batch Epoch: 3/20... Training Step: 1087... Training loss: 1.4300... 0.1238 sec/batch Epoch: 3/20... Training Step: 1088... Training loss: 1.4531... 0.1197 sec/batch Epoch: 3/20... Training Step: 1089... Training loss: 1.6735... 0.1237 sec/batch Epoch: 3/20... Training Step: 1090... Training loss: 1.5071... 0.1212 sec/batch Epoch: 3/20... Training Step: 1091... Training loss: 1.6246... 0.1225 sec/batch Epoch: 3/20... Training Step: 1092... Training loss: 1.2634... 0.1224 sec/batch Epoch: 3/20... Training Step: 1093... Training loss: 1.6045... 0.1222 sec/batch Epoch: 3/20... Training Step: 1094... Training loss: 1.5277... 0.1230 sec/batch Epoch: 3/20... Training Step: 1095... Training loss: 1.4848... 0.1215 sec/batch Epoch: 3/20... Training Step: 1096... Training loss: 1.6212... 0.1231 sec/batch Epoch: 3/20... Training Step: 1097... Training loss: 1.6334... 0.1212 sec/batch Epoch: 3/20... Training Step: 1098... Training loss: 1.6819... 0.1189 sec/batch Epoch: 3/20... Training Step: 1099... Training loss: 1.5043... 0.1238 sec/batch Epoch: 3/20... Training Step: 1100... Training loss: 1.5513... 0.1239 sec/batch Epoch: 3/20... Training Step: 1101... Training loss: 1.5325... 0.1245 sec/batch Epoch: 3/20... Training Step: 1102... Training loss: 1.5957... 0.1265 sec/batch Epoch: 3/20... Training Step: 1103... Training loss: 1.6104... 0.1219 sec/batch Epoch: 3/20... Training Step: 1104... Training loss: 1.3920... 0.1203 sec/batch Epoch: 3/20... Training Step: 1105... Training loss: 1.2441... 0.1195 sec/batch Epoch: 3/20... Training Step: 1106... Training loss: 1.6908... 0.1232 sec/batch Epoch: 3/20... Training Step: 1107... Training loss: 1.3905... 0.1199 sec/batch Epoch: 3/20... Training Step: 1108... Training loss: 1.7121... 0.1222 sec/batch Epoch: 3/20... Training Step: 1109... Training loss: 1.4404... 0.1228 sec/batch Epoch: 3/20... Training Step: 1110... Training loss: 1.7088... 0.1201 sec/batch Epoch: 3/20... Training Step: 1111... Training loss: 1.6019... 0.1215 sec/batch Epoch: 3/20... Training Step: 1112... Training loss: 1.5274... 0.1190 sec/batch Epoch: 3/20... Training Step: 1113... Training loss: 1.8034... 0.1230 sec/batch Epoch: 3/20... Training Step: 1114... Training loss: 1.5510... 0.1219 sec/batch Epoch: 3/20... Training Step: 1115... Training loss: 1.7064... 0.1228 sec/batch Epoch: 3/20... Training Step: 1116... Training loss: 1.4531... 0.1190 sec/batch Epoch: 3/20... Training Step: 1117... Training loss: 1.5547... 0.1258 sec/batch Epoch: 3/20... Training Step: 1118... Training loss: 1.6288... 0.1215 sec/batch Epoch: 3/20... Training Step: 1119... Training loss: 1.3383... 0.1216 sec/batch Epoch: 3/20... Training Step: 1120... Training loss: 1.6835... 0.1210 sec/batch Epoch: 3/20... Training Step: 1121... Training loss: 1.5582... 0.1240 sec/batch Epoch: 3/20... Training Step: 1122... Training loss: 1.4711... 0.1240 sec/batch Epoch: 3/20... Training Step: 1123... Training loss: 1.5970... 0.1265 sec/batch Epoch: 3/20... Training Step: 1124... Training loss: 1.5486... 0.1213 sec/batch Epoch: 3/20... Training Step: 1125... Training loss: 1.3892... 0.1204 sec/batch Epoch: 3/20... Training Step: 1126... Training loss: 1.5995... 0.1194 sec/batch Epoch: 3/20... Training Step: 1127... Training loss: 1.3688... 0.1312 sec/batch Epoch: 3/20... Training Step: 1128... Training loss: 1.4986... 0.1248 sec/batch Epoch: 3/20... Training Step: 1129... Training loss: 1.3921... 0.1203 sec/batch Epoch: 3/20... Training Step: 1130... Training loss: 1.4580... 0.1205 sec/batch Epoch: 3/20... Training Step: 1131... Training loss: 1.5017... 0.1312 sec/batch Epoch: 3/20... Training Step: 1132... Training loss: 1.6963... 0.1323 sec/batch Epoch: 3/20... Training Step: 1133... Training loss: 1.3546... 0.1297 sec/batch Epoch: 3/20... Training Step: 1134... Training loss: 1.3314... 0.1327 sec/batch Epoch: 3/20... Training Step: 1135... Training loss: 1.5422... 0.1353 sec/batch Epoch: 3/20... Training Step: 1136... Training loss: 1.5293... 0.1274 sec/batch Epoch: 3/20... Training Step: 1137... Training loss: 1.4793... 0.1262 sec/batch Epoch: 3/20... Training Step: 1138... Training loss: 1.4585... 0.1305 sec/batch Epoch: 3/20... Training Step: 1139... Training loss: 1.1157... 0.1414 sec/batch Epoch: 3/20... Training Step: 1140... Training loss: 1.5859... 0.1278 sec/batch Epoch: 3/20... Training Step: 1141... Training loss: 1.5781... 0.1232 sec/batch Epoch: 3/20... Training Step: 1142... Training loss: 1.4823... 0.1214 sec/batch Epoch: 3/20... Training Step: 1143... Training loss: 1.6744... 0.1209 sec/batch Epoch: 3/20... Training Step: 1144... Training loss: 1.4642... 0.1265 sec/batch Epoch: 3/20... Training Step: 1145... Training loss: 1.5313... 0.1217 sec/batch Epoch: 3/20... Training Step: 1146... Training loss: 1.3649... 0.1450 sec/batch Epoch: 3/20... Training Step: 1147... Training loss: 1.6506... 0.1408 sec/batch Epoch: 3/20... Training Step: 1148... Training loss: 1.4020... 0.1336 sec/batch Epoch: 3/20... Training Step: 1149... Training loss: 1.4229... 0.1321 sec/batch Epoch: 3/20... Training Step: 1150... Training loss: 1.6742... 0.1291 sec/batch Epoch: 3/20... Training Step: 1151... Training loss: 1.6882... 0.1212 sec/batch Epoch: 3/20... Training Step: 1152... Training loss: 1.6954... 0.1226 sec/batch Epoch: 3/20... Training Step: 1153... Training loss: 1.5207... 0.1207 sec/batch Epoch: 3/20... Training Step: 1154... Training loss: 1.6923... 0.1200 sec/batch Epoch: 3/20... Training Step: 1155... Training loss: 1.6228... 0.1204 sec/batch Epoch: 3/20... Training Step: 1156... Training loss: 1.3515... 0.1204 sec/batch Epoch: 3/20... Training Step: 1157... Training loss: 1.4203... 0.1255 sec/batch Epoch: 3/20... Training Step: 1158... Training loss: 1.4987... 0.1249 sec/batch Epoch: 3/20... Training Step: 1159... Training loss: 1.4366... 0.1234 sec/batch Epoch: 3/20... Training Step: 1160... Training loss: 1.3728... 0.1248 sec/batch Epoch: 3/20... Training Step: 1161... Training loss: 1.7861... 0.1224 sec/batch Epoch: 3/20... Training Step: 1162... Training loss: 1.3730... 0.1230 sec/batch Epoch: 3/20... Training Step: 1163... Training loss: 1.6585... 0.1241 sec/batch Epoch: 3/20... Training Step: 1164... Training loss: 1.3996... 0.1230 sec/batch Epoch: 3/20... Training Step: 1165... Training loss: 1.8117... 0.1224 sec/batch Epoch: 3/20... Training Step: 1166... Training loss: 1.3453... 0.1259 sec/batch Epoch: 3/20... Training Step: 1167... Training loss: 1.5191... 0.1248 sec/batch Epoch: 3/20... Training Step: 1168... Training loss: 1.5896... 0.1200 sec/batch Epoch: 3/20... Training Step: 1169... Training loss: 1.4248... 0.1253 sec/batch Epoch: 3/20... Training Step: 1170... Training loss: 1.4025... 0.1236 sec/batch Epoch: 3/20... Training Step: 1171... Training loss: 1.6216... 0.1211 sec/batch Epoch: 3/20... Training Step: 1172... Training loss: 1.5249... 0.1234 sec/batch Epoch: 3/20... Training Step: 1173... Training loss: 1.5229... 0.1256 sec/batch Epoch: 3/20... Training Step: 1174... Training loss: 1.3503... 0.1235 sec/batch Epoch: 3/20... Training Step: 1175... Training loss: 1.4559... 0.1283 sec/batch Epoch: 3/20... Training Step: 1176... Training loss: 1.5606... 0.1195 sec/batch Epoch: 3/20... Training Step: 1177... Training loss: 1.3951... 0.1220 sec/batch Epoch: 3/20... Training Step: 1178... Training loss: 1.4849... 0.1230 sec/batch Epoch: 3/20... Training Step: 1179... Training loss: 1.5060... 0.1229 sec/batch Epoch: 3/20... Training Step: 1180... Training loss: 1.4280... 0.1242 sec/batch Epoch: 3/20... Training Step: 1181... Training loss: 1.3011... 0.1213 sec/batch Epoch: 3/20... Training Step: 1182... Training loss: 1.5110... 0.1192 sec/batch Epoch: 3/20... Training Step: 1183... Training loss: 1.4393... 0.1215 sec/batch Epoch: 3/20... Training Step: 1184... Training loss: 1.4552... 0.1194 sec/batch Epoch: 3/20... Training Step: 1185... Training loss: 1.7136... 0.1237 sec/batch Epoch: 3/20... Training Step: 1186... Training loss: 1.4040... 0.1202 sec/batch Epoch: 3/20... Training Step: 1187... Training loss: 1.4646... 0.1237 sec/batch Epoch: 3/20... Training Step: 1188... Training loss: 1.4631... 0.1198 sec/batch Epoch: 3/20... Training Step: 1189... Training loss: 1.4965... 0.1248 sec/batch Epoch: 3/20... Training Step: 1190... Training loss: 1.5032... 0.1190 sec/batch Epoch: 3/20... Training Step: 1191... Training loss: 1.5201... 0.1232 sec/batch Epoch: 3/20... Training Step: 1192... Training loss: 1.6394... 0.1240 sec/batch Epoch: 3/20... Training Step: 1193... Training loss: 1.5528... 0.1200 sec/batch Epoch: 3/20... Training Step: 1194... Training loss: 1.5468... 0.1232 sec/batch Epoch: 3/20... Training Step: 1195... Training loss: 1.8005... 0.1265 sec/batch Epoch: 3/20... Training Step: 1196... Training loss: 1.5517... 0.1237 sec/batch Epoch: 3/20... Training Step: 1197... Training loss: 1.5621... 0.1256 sec/batch Epoch: 3/20... Training Step: 1198... Training loss: 1.7631... 0.1204 sec/batch Epoch: 3/20... Training Step: 1199... Training loss: 1.5714... 0.1230 sec/batch Epoch: 3/20... Training Step: 1200... Training loss: 1.5347... 0.1218 sec/batch Epoch: 3/20... Training Step: 1201... Training loss: 1.6003... 0.1256 sec/batch Epoch: 3/20... Training Step: 1202... Training loss: 1.4169... 0.1171 sec/batch Epoch: 3/20... Training Step: 1203... Training loss: 1.6148... 0.1161 sec/batch Epoch: 3/20... Training Step: 1204... Training loss: 1.5746... 0.1197 sec/batch Epoch: 3/20... Training Step: 1205... Training loss: 1.6521... 0.1223 sec/batch Epoch: 3/20... Training Step: 1206... Training loss: 1.7894... 0.1206 sec/batch Epoch: 3/20... Training Step: 1207... Training loss: 1.5322... 0.1197 sec/batch Epoch: 3/20... Training Step: 1208... Training loss: 1.4790... 0.1191 sec/batch Epoch: 3/20... Training Step: 1209... Training loss: 1.3678... 0.1205 sec/batch Epoch: 3/20... Training Step: 1210... Training loss: 1.3871... 0.1266 sec/batch Epoch: 3/20... Training Step: 1211... Training loss: 1.4056... 0.1211 sec/batch Epoch: 3/20... Training Step: 1212... Training loss: 1.5430... 0.1221 sec/batch Epoch: 3/20... Training Step: 1213... Training loss: 1.4124... 0.1227 sec/batch Epoch: 3/20... Training Step: 1214... Training loss: 1.5950... 0.1227 sec/batch Epoch: 3/20... Training Step: 1215... Training loss: 1.5371... 0.1209 sec/batch Epoch: 3/20... Training Step: 1216... Training loss: 1.5451... 0.1199 sec/batch Epoch: 3/20... Training Step: 1217... Training loss: 1.6628... 0.1254 sec/batch Epoch: 3/20... Training Step: 1218... Training loss: 1.4933... 0.1308 sec/batch Epoch: 3/20... Training Step: 1219... Training loss: 1.4938... 0.1243 sec/batch Epoch: 3/20... Training Step: 1220... Training loss: 1.4262... 0.1183 sec/batch Epoch: 3/20... Training Step: 1221... Training loss: 1.3884... 0.1186 sec/batch Epoch: 3/20... Training Step: 1222... Training loss: 1.4537... 0.1233 sec/batch Epoch: 3/20... Training Step: 1223... Training loss: 1.5068... 0.1255 sec/batch Epoch: 3/20... Training Step: 1224... Training loss: 1.6773... 0.1205 sec/batch Epoch: 3/20... Training Step: 1225... Training loss: 1.3959... 0.1217 sec/batch Epoch: 3/20... Training Step: 1226... Training loss: 1.4378... 0.1213 sec/batch Epoch: 3/20... Training Step: 1227... Training loss: 1.4300... 0.1211 sec/batch Epoch: 3/20... Training Step: 1228... Training loss: 1.5322... 0.1208 sec/batch Epoch: 3/20... Training Step: 1229... Training loss: 1.4586... 0.1243 sec/batch Epoch: 3/20... Training Step: 1230... Training loss: 1.5052... 0.1236 sec/batch Epoch: 3/20... Training Step: 1231... Training loss: 1.2914... 0.1227 sec/batch Epoch: 3/20... Training Step: 1232... Training loss: 1.7427... 0.1197 sec/batch Epoch: 3/20... Training Step: 1233... Training loss: 1.3229... 0.1235 sec/batch Epoch: 3/20... Training Step: 1234... Training loss: 1.5045... 0.1225 sec/batch Epoch: 3/20... Training Step: 1235... Training loss: 1.4541... 0.1210 sec/batch Epoch: 3/20... Training Step: 1236... Training loss: 1.8124... 0.1204 sec/batch Epoch: 3/20... Training Step: 1237... Training loss: 1.5099... 0.1206 sec/batch Epoch: 3/20... Training Step: 1238... Training loss: 1.6020... 0.1218 sec/batch Epoch: 3/20... Training Step: 1239... Training loss: 1.5292... 0.1217 sec/batch Epoch: 3/20... Training Step: 1240... Training loss: 1.5018... 0.1231 sec/batch Epoch: 3/20... Training Step: 1241... Training loss: 1.6346... 0.1230 sec/batch Epoch: 3/20... Training Step: 1242... Training loss: 1.4792... 0.1233 sec/batch Epoch: 3/20... Training Step: 1243... Training loss: 1.2944... 0.1229 sec/batch Epoch: 3/20... Training Step: 1244... Training loss: 1.4494... 0.1199 sec/batch Epoch: 3/20... Training Step: 1245... Training loss: 1.4722... 0.1237 sec/batch Epoch: 3/20... Training Step: 1246... Training loss: 1.4190... 0.1211 sec/batch Epoch: 3/20... Training Step: 1247... Training loss: 1.4602... 0.1234 sec/batch Epoch: 3/20... Training Step: 1248... Training loss: 1.3172... 0.1233 sec/batch Epoch: 3/20... Training Step: 1249... Training loss: 1.2813... 0.1188 sec/batch Epoch: 3/20... Training Step: 1250... Training loss: 1.4847... 0.1252 sec/batch Epoch: 3/20... Training Step: 1251... Training loss: 1.3949... 0.1244 sec/batch Epoch: 3/20... Training Step: 1252... Training loss: 1.3037... 0.1237 sec/batch Epoch: 3/20... Training Step: 1253... Training loss: 1.2033... 0.1338 sec/batch Epoch: 3/20... Training Step: 1254... Training loss: 1.3316... 0.1363 sec/batch Epoch: 3/20... Training Step: 1255... Training loss: 1.5470... 0.1228 sec/batch Epoch: 3/20... Training Step: 1256... Training loss: 1.4848... 0.1190 sec/batch Epoch: 3/20... Training Step: 1257... Training loss: 1.4451... 0.1195 sec/batch Epoch: 3/20... Training Step: 1258... Training loss: 1.5039... 0.1204 sec/batch Epoch: 3/20... Training Step: 1259... Training loss: 1.5019... 0.1223 sec/batch Epoch: 3/20... Training Step: 1260... Training loss: 1.4659... 0.1216 sec/batch Epoch: 3/20... Training Step: 1261... Training loss: 1.2788... 0.1192 sec/batch Epoch: 3/20... Training Step: 1262... Training loss: 1.3967... 0.1267 sec/batch Epoch: 3/20... Training Step: 1263... Training loss: 1.6080... 0.1298 sec/batch Epoch: 3/20... Training Step: 1264... Training loss: 1.4442... 0.1268 sec/batch Epoch: 3/20... Training Step: 1265... Training loss: 1.4486... 0.1297 sec/batch Epoch: 3/20... Training Step: 1266... Training loss: 1.4580... 0.1258 sec/batch Epoch: 3/20... Training Step: 1267... Training loss: 1.5623... 0.1245 sec/batch Epoch: 3/20... Training Step: 1268... Training loss: 1.3603... 0.1206 sec/batch Epoch: 3/20... Training Step: 1269... Training loss: 1.3881... 0.1228 sec/batch Epoch: 3/20... Training Step: 1270... Training loss: 1.5881... 0.1202 sec/batch Epoch: 3/20... Training Step: 1271... Training loss: 1.1880... 0.1177 sec/batch Epoch: 3/20... Training Step: 1272... Training loss: 1.6003... 0.1224 sec/batch Epoch: 3/20... Training Step: 1273... Training loss: 1.3740... 0.1229 sec/batch Epoch: 3/20... Training Step: 1274... Training loss: 1.3288... 0.1190 sec/batch Epoch: 3/20... Training Step: 1275... Training loss: 1.2824... 0.1275 sec/batch Epoch: 3/20... Training Step: 1276... Training loss: 1.7658... 0.1248 sec/batch Epoch: 3/20... Training Step: 1277... Training loss: 1.2926... 0.1230 sec/batch Epoch: 3/20... Training Step: 1278... Training loss: 1.4131... 0.1203 sec/batch Epoch: 3/20... Training Step: 1279... Training loss: 1.4284... 0.1236 sec/batch Epoch: 3/20... Training Step: 1280... Training loss: 1.4369... 0.1240 sec/batch Epoch: 3/20... Training Step: 1281... Training loss: 1.2484... 0.1205 sec/batch Epoch: 3/20... Training Step: 1282... Training loss: 1.1394... 0.1212 sec/batch Epoch: 3/20... Training Step: 1283... Training loss: 1.5956... 0.1231 sec/batch Epoch: 3/20... Training Step: 1284... Training loss: 1.3623... 0.1231 sec/batch Epoch: 3/20... Training Step: 1285... Training loss: 1.3414... 0.1168 sec/batch Epoch: 3/20... Training Step: 1286... Training loss: 1.4809... 0.1236 sec/batch Epoch: 3/20... Training Step: 1287... Training loss: 1.6673... 0.1233 sec/batch Epoch: 3/20... Training Step: 1288... Training loss: 1.1649... 0.1233 sec/batch Epoch: 3/20... Training Step: 1289... Training loss: 1.4957... 0.1265 sec/batch Epoch: 3/20... Training Step: 1290... Training loss: 1.4759... 0.1211 sec/batch Epoch: 3/20... Training Step: 1291... Training loss: 1.4942... 0.1233 sec/batch Epoch: 3/20... Training Step: 1292... Training loss: 1.5007... 0.1234 sec/batch Epoch: 3/20... Training Step: 1293... Training loss: 1.3933... 0.1204 sec/batch Epoch: 3/20... Training Step: 1294... Training loss: 1.4123... 0.1174 sec/batch Epoch: 3/20... Training Step: 1295... Training loss: 1.2318... 0.1189 sec/batch Epoch: 3/20... Training Step: 1296... Training loss: 1.5183... 0.1207 sec/batch Epoch: 3/20... Training Step: 1297... Training loss: 1.4462... 0.1256 sec/batch Epoch: 3/20... Training Step: 1298... Training loss: 1.3265... 0.1228 sec/batch Epoch: 3/20... Training Step: 1299... Training loss: 1.2633... 0.1228 sec/batch Epoch: 3/20... Training Step: 1300... Training loss: 1.5061... 0.1218 sec/batch Epoch: 3/20... Training Step: 1301... Training loss: 1.4581... 0.1223 sec/batch Epoch: 3/20... Training Step: 1302... Training loss: 1.5485... 0.1216 sec/batch Epoch: 3/20... Training Step: 1303... Training loss: 1.4608... 0.1212 sec/batch Epoch: 3/20... Training Step: 1304... Training loss: 1.3954... 0.1225 sec/batch Epoch: 3/20... Training Step: 1305... Training loss: 1.5705... 0.1261 sec/batch Epoch: 3/20... Training Step: 1306... Training loss: 1.4479... 0.1320 sec/batch Epoch: 3/20... Training Step: 1307... Training loss: 1.4015... 0.1288 sec/batch Epoch: 3/20... Training Step: 1308... Training loss: 1.6474... 0.1200 sec/batch Epoch: 3/20... Training Step: 1309... Training loss: 1.2750... 0.1179 sec/batch Epoch: 3/20... Training Step: 1310... Training loss: 1.3717... 0.1206 sec/batch Epoch: 3/20... Training Step: 1311... Training loss: 1.3306... 0.1225 sec/batch Epoch: 3/20... Training Step: 1312... Training loss: 1.4124... 0.1149 sec/batch Epoch: 3/20... Training Step: 1313... Training loss: 1.3034... 0.1221 sec/batch Epoch: 3/20... Training Step: 1314... Training loss: 1.4703... 0.1244 sec/batch Epoch: 3/20... Training Step: 1315... Training loss: 1.4667... 0.1232 sec/batch Epoch: 3/20... Training Step: 1316... Training loss: 1.2582... 0.1288 sec/batch Epoch: 3/20... Training Step: 1317... Training loss: 1.3367... 0.1225 sec/batch Epoch: 3/20... Training Step: 1318... Training loss: 1.3607... 0.1178 sec/batch Epoch: 3/20... Training Step: 1319... Training loss: 1.2614... 0.1231 sec/batch Epoch: 3/20... Training Step: 1320... Training loss: 1.2499... 0.1217 sec/batch Epoch: 3/20... Training Step: 1321... Training loss: 1.3843... 0.1233 sec/batch Epoch: 3/20... Training Step: 1322... Training loss: 1.4731... 0.1237 sec/batch Epoch: 3/20... Training Step: 1323... Training loss: 1.3160... 0.1215 sec/batch Epoch: 3/20... Training Step: 1324... Training loss: 1.5521... 0.1229 sec/batch Epoch: 3/20... Training Step: 1325... Training loss: 1.2794... 0.1243 sec/batch Epoch: 3/20... Training Step: 1326... Training loss: 1.2844... 0.1232 sec/batch Epoch: 3/20... Training Step: 1327... Training loss: 1.3642... 0.1236 sec/batch Epoch: 3/20... Training Step: 1328... Training loss: 1.3242... 0.1219 sec/batch Epoch: 3/20... Training Step: 1329... Training loss: 1.4141... 0.1227 sec/batch Epoch: 3/20... Training Step: 1330... Training loss: 1.3989... 0.1258 sec/batch Epoch: 3/20... Training Step: 1331... Training loss: 1.3728... 0.1221 sec/batch Epoch: 3/20... Training Step: 1332... Training loss: 1.4453... 0.1225 sec/batch Epoch: 3/20... Training Step: 1333... Training loss: 1.4572... 0.1202 sec/batch Epoch: 3/20... Training Step: 1334... Training loss: 1.6580... 0.1209 sec/batch Epoch: 3/20... Training Step: 1335... Training loss: 1.4955... 0.1209 sec/batch Epoch: 3/20... Training Step: 1336... Training loss: 1.7907... 0.1191 sec/batch Epoch: 3/20... Training Step: 1337... Training loss: 1.4268... 0.1239 sec/batch Epoch: 3/20... Training Step: 1338... Training loss: 1.3740... 0.1220 sec/batch Epoch: 3/20... Training Step: 1339... Training loss: 1.2539... 0.1243 sec/batch Epoch: 3/20... Training Step: 1340... Training loss: 1.4921... 0.1291 sec/batch Epoch: 3/20... Training Step: 1341... Training loss: 1.4860... 0.1239 sec/batch Epoch: 3/20... Training Step: 1342... Training loss: 1.4720... 0.1208 sec/batch Epoch: 3/20... Training Step: 1343... Training loss: 1.6715... 0.1269 sec/batch Epoch: 3/20... Training Step: 1344... Training loss: 1.7732... 0.1307 sec/batch Epoch: 3/20... Training Step: 1345... Training loss: 1.5820... 0.1327 sec/batch Epoch: 3/20... Training Step: 1346... Training loss: 1.3372... 0.1170 sec/batch Epoch: 3/20... Training Step: 1347... Training loss: 1.6558... 0.1314 sec/batch Epoch: 3/20... Training Step: 1348... Training loss: 1.3329... 0.1218 sec/batch Epoch: 3/20... Training Step: 1349... Training loss: 1.5555... 0.1188 sec/batch Epoch: 3/20... Training Step: 1350... Training loss: 1.5288... 0.1314 sec/batch Epoch: 3/20... Training Step: 1351... Training loss: 1.5522... 0.1235 sec/batch Epoch: 3/20... Training Step: 1352... Training loss: 1.5946... 0.1249 sec/batch Epoch: 3/20... Training Step: 1353... Training loss: 1.3923... 0.1265 sec/batch Epoch: 3/20... Training Step: 1354... Training loss: 1.5529... 0.1299 sec/batch Epoch: 3/20... Training Step: 1355... Training loss: 1.5271... 0.1359 sec/batch Epoch: 3/20... Training Step: 1356... Training loss: 1.5991... 0.1324 sec/batch Epoch: 3/20... Training Step: 1357... Training loss: 1.4104... 0.1308 sec/batch Epoch: 3/20... Training Step: 1358... Training loss: 1.4555... 0.1285 sec/batch Epoch: 3/20... Training Step: 1359... Training loss: 1.6443... 0.1335 sec/batch Epoch: 3/20... Training Step: 1360... Training loss: 1.3999... 0.1206 sec/batch Epoch: 3/20... Training Step: 1361... Training loss: 1.7414... 0.1229 sec/batch Epoch: 3/20... Training Step: 1362... Training loss: 1.4795... 0.1209 sec/batch Epoch: 3/20... Training Step: 1363... Training loss: 1.3346... 0.1194 sec/batch Epoch: 3/20... Training Step: 1364... Training loss: 1.5552... 0.1215 sec/batch Epoch: 3/20... Training Step: 1365... Training loss: 1.3470... 0.1204 sec/batch Epoch: 3/20... Training Step: 1366... Training loss: 1.6726... 0.1196 sec/batch Epoch: 3/20... Training Step: 1367... Training loss: 1.7311... 0.1206 sec/batch Epoch: 3/20... Training Step: 1368... Training loss: 1.7849... 0.1217 sec/batch Epoch: 3/20... Training Step: 1369... Training loss: 1.5782... 0.1246 sec/batch Epoch: 3/20... Training Step: 1370... Training loss: 1.4696... 0.1254 sec/batch Epoch: 3/20... Training Step: 1371... Training loss: 1.5873... 0.1217 sec/batch Epoch: 3/20... Training Step: 1372... Training loss: 1.5114... 0.1223 sec/batch Epoch: 3/20... Training Step: 1373... Training loss: 1.4033... 0.1171 sec/batch Epoch: 3/20... Training Step: 1374... Training loss: 1.3788... 0.1273 sec/batch Epoch: 3/20... Training Step: 1375... Training loss: 1.5542... 0.1250 sec/batch Epoch: 3/20... Training Step: 1376... Training loss: 1.2138... 0.1211 sec/batch Epoch: 3/20... Training Step: 1377... Training loss: 1.3873... 0.1206 sec/batch Epoch: 3/20... Training Step: 1378... Training loss: 1.4237... 0.1236 sec/batch Epoch: 3/20... Training Step: 1379... Training loss: 1.4807... 0.1255 sec/batch Epoch: 3/20... Training Step: 1380... Training loss: 1.4350... 0.1270 sec/batch Epoch: 3/20... Training Step: 1381... Training loss: 1.3911... 0.1233 sec/batch Epoch: 3/20... Training Step: 1382... Training loss: 1.5712... 0.1264 sec/batch Epoch: 3/20... Training Step: 1383... Training loss: 1.5879... 0.1213 sec/batch Epoch: 3/20... Training Step: 1384... Training loss: 1.3152... 0.1237 sec/batch Epoch: 3/20... Training Step: 1385... Training loss: 1.3861... 0.1255 sec/batch Epoch: 3/20... Training Step: 1386... Training loss: 1.2745... 0.1213 sec/batch Epoch: 3/20... Training Step: 1387... Training loss: 1.1810... 0.1248 sec/batch Epoch: 3/20... Training Step: 1388... Training loss: 1.3882... 0.1251 sec/batch Epoch: 3/20... Training Step: 1389... Training loss: 1.5194... 0.1234 sec/batch Epoch: 3/20... Training Step: 1390... Training loss: 1.4200... 0.1217 sec/batch Epoch: 3/20... Training Step: 1391... Training loss: 1.4400... 0.1234 sec/batch Epoch: 3/20... Training Step: 1392... Training loss: 1.3359... 0.1225 sec/batch Epoch: 4/20... Training Step: 1393... Training loss: 1.9313... 0.1260 sec/batch Epoch: 4/20... Training Step: 1394... Training loss: 1.4922... 0.1188 sec/batch Epoch: 4/20... Training Step: 1395... Training loss: 1.4301... 0.1268 sec/batch Epoch: 4/20... Training Step: 1396... Training loss: 1.3722... 0.1196 sec/batch Epoch: 4/20... Training Step: 1397... Training loss: 1.4674... 0.1252 sec/batch Epoch: 4/20... Training Step: 1398... Training loss: 1.2833... 0.1244 sec/batch Epoch: 4/20... Training Step: 1399... Training loss: 1.4789... 0.1240 sec/batch Epoch: 4/20... Training Step: 1400... Training loss: 1.3207... 0.1296 sec/batch Epoch: 4/20... Training Step: 1401... Training loss: 1.2971... 0.1233 sec/batch Epoch: 4/20... Training Step: 1402... Training loss: 1.6051... 0.1176 sec/batch Epoch: 4/20... Training Step: 1403... Training loss: 1.3909... 0.1229 sec/batch Epoch: 4/20... Training Step: 1404... Training loss: 1.1811... 0.1225 sec/batch Epoch: 4/20... Training Step: 1405... Training loss: 1.6804... 0.1213 sec/batch Epoch: 4/20... Training Step: 1406... Training loss: 1.2042... 0.1191 sec/batch Epoch: 4/20... Training Step: 1407... Training loss: 1.3924... 0.1253 sec/batch Epoch: 4/20... Training Step: 1408... Training loss: 1.4801... 0.1210 sec/batch Epoch: 4/20... Training Step: 1409... Training loss: 1.3012... 0.1255 sec/batch Epoch: 4/20... Training Step: 1410... Training loss: 1.2288... 0.1194 sec/batch Epoch: 4/20... Training Step: 1411... Training loss: 1.3916... 0.1194 sec/batch Epoch: 4/20... Training Step: 1412... Training loss: 1.2628... 0.1217 sec/batch Epoch: 4/20... Training Step: 1413... Training loss: 1.3941... 0.1213 sec/batch Epoch: 4/20... Training Step: 1414... Training loss: 1.3971... 0.1188 sec/batch Epoch: 4/20... Training Step: 1415... Training loss: 1.6025... 0.1210 sec/batch Epoch: 4/20... Training Step: 1416... Training loss: 1.2473... 0.1230 sec/batch Epoch: 4/20... Training Step: 1417... Training loss: 1.3737... 0.1197 sec/batch Epoch: 4/20... Training Step: 1418... Training loss: 1.3562... 0.1182 sec/batch Epoch: 4/20... Training Step: 1419... Training loss: 1.4493... 0.1213 sec/batch Epoch: 4/20... Training Step: 1420... Training loss: 1.2747... 0.1222 sec/batch Epoch: 4/20... Training Step: 1421... Training loss: 1.3746... 0.1236 sec/batch Epoch: 4/20... Training Step: 1422... Training loss: 1.3678... 0.1167 sec/batch Epoch: 4/20... Training Step: 1423... Training loss: 1.2374... 0.1255 sec/batch Epoch: 4/20... Training Step: 1424... Training loss: 1.3995... 0.1181 sec/batch Epoch: 4/20... Training Step: 1425... Training loss: 1.2729... 0.1219 sec/batch Epoch: 4/20... Training Step: 1426... Training loss: 1.2481... 0.1241 sec/batch Epoch: 4/20... Training Step: 1427... Training loss: 1.3059... 0.1253 sec/batch Epoch: 4/20... Training Step: 1428... Training loss: 1.2463... 0.1213 sec/batch Epoch: 4/20... Training Step: 1429... Training loss: 1.4052... 0.1252 sec/batch Epoch: 4/20... Training Step: 1430... Training loss: 1.1703... 0.1200 sec/batch Epoch: 4/20... Training Step: 1431... Training loss: 1.2630... 0.1275 sec/batch Epoch: 4/20... Training Step: 1432... Training loss: 1.5672... 0.1254 sec/batch Epoch: 4/20... Training Step: 1433... Training loss: 1.3659... 0.1217 sec/batch Epoch: 4/20... Training Step: 1434... Training loss: 1.2535... 0.1230 sec/batch Epoch: 4/20... Training Step: 1435... Training loss: 1.5014... 0.1286 sec/batch Epoch: 4/20... Training Step: 1436... Training loss: 1.0834... 0.1224 sec/batch Epoch: 4/20... Training Step: 1437... Training loss: 1.3498... 0.1297 sec/batch Epoch: 4/20... Training Step: 1438... Training loss: 1.1832... 0.1233 sec/batch Epoch: 4/20... Training Step: 1439... Training loss: 1.3530... 0.1220 sec/batch Epoch: 4/20... Training Step: 1440... Training loss: 1.3427... 0.1238 sec/batch Epoch: 4/20... Training Step: 1441... Training loss: 1.2966... 0.1211 sec/batch Epoch: 4/20... Training Step: 1442... Training loss: 1.4366... 0.1193 sec/batch Epoch: 4/20... Training Step: 1443... Training loss: 1.3389... 0.1235 sec/batch Epoch: 4/20... Training Step: 1444... Training loss: 1.4836... 0.1181 sec/batch Epoch: 4/20... Training Step: 1445... Training loss: 1.4737... 0.1209 sec/batch Epoch: 4/20... Training Step: 1446... Training loss: 1.3881... 0.1197 sec/batch Epoch: 4/20... Training Step: 1447... Training loss: 1.2272... 0.1262 sec/batch Epoch: 4/20... Training Step: 1448... Training loss: 1.4611... 0.1356 sec/batch Epoch: 4/20... Training Step: 1449... Training loss: 1.4331... 0.1266 sec/batch Epoch: 4/20... Training Step: 1450... Training loss: 1.3789... 0.1190 sec/batch Epoch: 4/20... Training Step: 1451... Training loss: 1.2747... 0.1268 sec/batch Epoch: 4/20... Training Step: 1452... Training loss: 1.2960... 0.1232 sec/batch Epoch: 4/20... Training Step: 1453... Training loss: 1.4640... 0.1207 sec/batch Epoch: 4/20... Training Step: 1454... Training loss: 1.4305... 0.1203 sec/batch Epoch: 4/20... Training Step: 1455... Training loss: 1.3445... 0.1195 sec/batch Epoch: 4/20... Training Step: 1456... Training loss: 1.3732... 0.1236 sec/batch Epoch: 4/20... Training Step: 1457... Training loss: 1.1927... 0.1213 sec/batch Epoch: 4/20... Training Step: 1458... Training loss: 1.5063... 0.1196 sec/batch Epoch: 4/20... Training Step: 1459... Training loss: 1.4179... 0.1225 sec/batch Epoch: 4/20... Training Step: 1460... Training loss: 1.4725... 0.1216 sec/batch Epoch: 4/20... Training Step: 1461... Training loss: 1.2833... 0.1218 sec/batch Epoch: 4/20... Training Step: 1462... Training loss: 1.3418... 0.1200 sec/batch Epoch: 4/20... Training Step: 1463... Training loss: 1.3958... 0.1219 sec/batch Epoch: 4/20... Training Step: 1464... Training loss: 1.3670... 0.1246 sec/batch Epoch: 4/20... Training Step: 1465... Training loss: 1.4485... 0.1194 sec/batch Epoch: 4/20... Training Step: 1466... Training loss: 1.3058... 0.1264 sec/batch Epoch: 4/20... Training Step: 1467... Training loss: 1.6422... 0.1211 sec/batch Epoch: 4/20... Training Step: 1468... Training loss: 1.3307... 0.1223 sec/batch Epoch: 4/20... Training Step: 1469... Training loss: 1.2994... 0.1323 sec/batch Epoch: 4/20... Training Step: 1470... Training loss: 1.4384... 0.1281 sec/batch Epoch: 4/20... Training Step: 1471... Training loss: 1.3853... 0.1251 sec/batch Epoch: 4/20... Training Step: 1472... Training loss: 1.2819... 0.1274 sec/batch Epoch: 4/20... Training Step: 1473... Training loss: 1.5100... 0.1250 sec/batch Epoch: 4/20... Training Step: 1474... Training loss: 1.3332... 0.1283 sec/batch Epoch: 4/20... Training Step: 1475... Training loss: 1.2866... 0.1289 sec/batch Epoch: 4/20... Training Step: 1476... Training loss: 1.4128... 0.1186 sec/batch Epoch: 4/20... Training Step: 1477... Training loss: 1.4052... 0.1275 sec/batch Epoch: 4/20... Training Step: 1478... Training loss: 1.4796... 0.1229 sec/batch Epoch: 4/20... Training Step: 1479... Training loss: 1.2473... 0.1240 sec/batch Epoch: 4/20... Training Step: 1480... Training loss: 1.5011... 0.1213 sec/batch Epoch: 4/20... Training Step: 1481... Training loss: 1.5194... 0.1224 sec/batch Epoch: 4/20... Training Step: 1482... Training loss: 1.3242... 0.1188 sec/batch Epoch: 4/20... Training Step: 1483... Training loss: 1.5245... 0.1207 sec/batch Epoch: 4/20... Training Step: 1484... Training loss: 1.6861... 0.1270 sec/batch Epoch: 4/20... Training Step: 1485... Training loss: 1.2433... 0.1218 sec/batch Epoch: 4/20... Training Step: 1486... Training loss: 1.5423... 0.1188 sec/batch Epoch: 4/20... Training Step: 1487... Training loss: 1.3283... 0.1232 sec/batch Epoch: 4/20... Training Step: 1488... Training loss: 1.3628... 0.1221 sec/batch Epoch: 4/20... Training Step: 1489... Training loss: 1.7787... 0.1226 sec/batch Epoch: 4/20... Training Step: 1490... Training loss: 1.5023... 0.1201 sec/batch Epoch: 4/20... Training Step: 1491... Training loss: 1.5314... 0.1214 sec/batch Epoch: 4/20... Training Step: 1492... Training loss: 1.3412... 0.1200 sec/batch Epoch: 4/20... Training Step: 1493... Training loss: 1.4585... 0.1210 sec/batch Epoch: 4/20... Training Step: 1494... Training loss: 1.6980... 0.1221 sec/batch Epoch: 4/20... Training Step: 1495... Training loss: 1.6606... 0.1256 sec/batch Epoch: 4/20... Training Step: 1496... Training loss: 1.5003... 0.1248 sec/batch Epoch: 4/20... Training Step: 1497... Training loss: 1.6203... 0.1218 sec/batch Epoch: 4/20... Training Step: 1498... Training loss: 1.7408... 0.1183 sec/batch Epoch: 4/20... Training Step: 1499... Training loss: 1.6151... 0.1221 sec/batch Epoch: 4/20... Training Step: 1500... Training loss: 1.9297... 0.1235 sec/batch Epoch: 4/20... Training Step: 1501... Training loss: 1.6583... 0.1245 sec/batch Epoch: 4/20... Training Step: 1502... Training loss: 1.3600... 0.1203 sec/batch Epoch: 4/20... Training Step: 1503... Training loss: 1.5712... 0.1240 sec/batch Epoch: 4/20... Training Step: 1504... Training loss: 1.4967... 0.1227 sec/batch Epoch: 4/20... Training Step: 1505... Training loss: 1.6027... 0.1224 sec/batch Epoch: 4/20... Training Step: 1506... Training loss: 1.6185... 0.1237 sec/batch Epoch: 4/20... Training Step: 1507... Training loss: 1.5903... 0.1201 sec/batch Epoch: 4/20... Training Step: 1508... Training loss: 1.3800... 0.1210 sec/batch Epoch: 4/20... Training Step: 1509... Training loss: 1.5354... 0.1228 sec/batch Epoch: 4/20... Training Step: 1510... Training loss: 1.6983... 0.1213 sec/batch Epoch: 4/20... Training Step: 1511... Training loss: 1.4242... 0.1237 sec/batch Epoch: 4/20... Training Step: 1512... Training loss: 1.2859... 0.1217 sec/batch Epoch: 4/20... Training Step: 1513... Training loss: 1.5107... 0.1242 sec/batch Epoch: 4/20... Training Step: 1514... Training loss: 1.4637... 0.1193 sec/batch Epoch: 4/20... Training Step: 1515... Training loss: 1.6800... 0.1193 sec/batch Epoch: 4/20... Training Step: 1516... Training loss: 1.5276... 0.1198 sec/batch Epoch: 4/20... Training Step: 1517... Training loss: 1.5649... 0.1181 sec/batch Epoch: 4/20... Training Step: 1518... Training loss: 1.2967... 0.1233 sec/batch Epoch: 4/20... Training Step: 1519... Training loss: 1.3368... 0.1250 sec/batch Epoch: 4/20... Training Step: 1520... Training loss: 1.4736... 0.1232 sec/batch Epoch: 4/20... Training Step: 1521... Training loss: 1.6530... 0.1236 sec/batch Epoch: 4/20... Training Step: 1522... Training loss: 1.4966... 0.1230 sec/batch Epoch: 4/20... Training Step: 1523... Training loss: 1.7429... 0.1173 sec/batch Epoch: 4/20... Training Step: 1524... Training loss: 1.5686... 0.1211 sec/batch Epoch: 4/20... Training Step: 1525... Training loss: 1.3538... 0.1220 sec/batch Epoch: 4/20... Training Step: 1526... Training loss: 1.5236... 0.1212 sec/batch Epoch: 4/20... Training Step: 1527... Training loss: 1.2845... 0.1264 sec/batch Epoch: 4/20... Training Step: 1528... Training loss: 1.2576... 0.1238 sec/batch Epoch: 4/20... Training Step: 1529... Training loss: 1.2262... 0.1259 sec/batch Epoch: 4/20... Training Step: 1530... Training loss: 1.3750... 0.1197 sec/batch Epoch: 4/20... Training Step: 1531... Training loss: 1.2956... 0.1232 sec/batch Epoch: 4/20... Training Step: 1532... Training loss: 1.3355... 0.1191 sec/batch Epoch: 4/20... Training Step: 1533... Training loss: 1.3361... 0.1202 sec/batch Epoch: 4/20... Training Step: 1534... Training loss: 1.3512... 0.1224 sec/batch Epoch: 4/20... Training Step: 1535... Training loss: 1.1822... 0.1271 sec/batch Epoch: 4/20... Training Step: 1536... Training loss: 1.4463... 0.1209 sec/batch Epoch: 4/20... Training Step: 1537... Training loss: 1.4729... 0.1215 sec/batch Epoch: 4/20... Training Step: 1538... Training loss: 1.3268... 0.1181 sec/batch Epoch: 4/20... Training Step: 1539... Training loss: 1.5076... 0.1230 sec/batch Epoch: 4/20... Training Step: 1540... Training loss: 1.2768... 0.1227 sec/batch Epoch: 4/20... Training Step: 1541... Training loss: 1.3413... 0.1303 sec/batch Epoch: 4/20... Training Step: 1542... Training loss: 1.6151... 0.1289 sec/batch Epoch: 4/20... Training Step: 1543... Training loss: 1.4276... 0.1224 sec/batch Epoch: 4/20... Training Step: 1544... Training loss: 1.5594... 0.1185 sec/batch Epoch: 4/20... Training Step: 1545... Training loss: 1.6299... 0.1234 sec/batch Epoch: 4/20... Training Step: 1546... Training loss: 1.4598... 0.1215 sec/batch Epoch: 4/20... Training Step: 1547... Training loss: 1.5796... 0.1202 sec/batch Epoch: 4/20... Training Step: 1548... Training loss: 1.2760... 0.1241 sec/batch Epoch: 4/20... Training Step: 1549... Training loss: 1.3051... 0.1261 sec/batch Epoch: 4/20... Training Step: 1550... Training loss: 1.3215... 0.1242 sec/batch Epoch: 4/20... Training Step: 1551... Training loss: 1.3592... 0.1184 sec/batch Epoch: 4/20... Training Step: 1552... Training loss: 1.3590... 0.1255 sec/batch Epoch: 4/20... Training Step: 1553... Training loss: 1.4959... 0.1265 sec/batch Epoch: 4/20... Training Step: 1554... Training loss: 1.4831... 0.1190 sec/batch Epoch: 4/20... Training Step: 1555... Training loss: 1.5494... 0.1240 sec/batch Epoch: 4/20... Training Step: 1556... Training loss: 1.2590... 0.1238 sec/batch Epoch: 4/20... Training Step: 1557... Training loss: 1.4489... 0.1272 sec/batch Epoch: 4/20... Training Step: 1558... Training loss: 1.3497... 0.1266 sec/batch Epoch: 4/20... Training Step: 1559... Training loss: 1.3712... 0.1186 sec/batch Epoch: 4/20... Training Step: 1560... Training loss: 1.5263... 0.1200 sec/batch Epoch: 4/20... Training Step: 1561... Training loss: 1.4972... 0.1242 sec/batch Epoch: 4/20... Training Step: 1562... Training loss: 1.5029... 0.1242 sec/batch Epoch: 4/20... Training Step: 1563... Training loss: 1.3780... 0.1257 sec/batch Epoch: 4/20... Training Step: 1564... Training loss: 1.5499... 0.1213 sec/batch Epoch: 4/20... Training Step: 1565... Training loss: 1.3015... 0.1164 sec/batch Epoch: 4/20... Training Step: 1566... Training loss: 1.4424... 0.1207 sec/batch Epoch: 4/20... Training Step: 1567... Training loss: 1.4742... 0.1179 sec/batch Epoch: 4/20... Training Step: 1568... Training loss: 1.2731... 0.1262 sec/batch Epoch: 4/20... Training Step: 1569... Training loss: 1.2498... 0.1218 sec/batch Epoch: 4/20... Training Step: 1570... Training loss: 1.6188... 0.1240 sec/batch Epoch: 4/20... Training Step: 1571... Training loss: 1.3378... 0.1189 sec/batch Epoch: 4/20... Training Step: 1572... Training loss: 1.5607... 0.1239 sec/batch Epoch: 4/20... Training Step: 1573... Training loss: 1.3539... 0.1270 sec/batch Epoch: 4/20... Training Step: 1574... Training loss: 1.6304... 0.1174 sec/batch Epoch: 4/20... Training Step: 1575... Training loss: 1.4477... 0.1237 sec/batch Epoch: 4/20... Training Step: 1576... Training loss: 1.4303... 0.1214 sec/batch Epoch: 4/20... Training Step: 1577... Training loss: 1.6332... 0.1256 sec/batch Epoch: 4/20... Training Step: 1578... Training loss: 1.4556... 0.1245 sec/batch Epoch: 4/20... Training Step: 1579... Training loss: 1.5490... 0.1233 sec/batch Epoch: 4/20... Training Step: 1580... Training loss: 1.2421... 0.1211 sec/batch Epoch: 4/20... Training Step: 1581... Training loss: 1.4559... 0.1238 sec/batch Epoch: 4/20... Training Step: 1582... Training loss: 1.2966... 0.1234 sec/batch Epoch: 4/20... Training Step: 1583... Training loss: 1.2735... 0.1240 sec/batch Epoch: 4/20... Training Step: 1584... Training loss: 1.5259... 0.1202 sec/batch Epoch: 4/20... Training Step: 1585... Training loss: 1.4117... 0.1178 sec/batch Epoch: 4/20... Training Step: 1586... Training loss: 1.4221... 0.1234 sec/batch Epoch: 4/20... Training Step: 1587... Training loss: 1.5599... 0.1219 sec/batch Epoch: 4/20... Training Step: 1588... Training loss: 1.3522... 0.1262 sec/batch Epoch: 4/20... Training Step: 1589... Training loss: 1.2736... 0.1309 sec/batch Epoch: 4/20... Training Step: 1590... Training loss: 1.4886... 0.1279 sec/batch Epoch: 4/20... Training Step: 1591... Training loss: 1.2435... 0.1349 sec/batch Epoch: 4/20... Training Step: 1592... Training loss: 1.2611... 0.1300 sec/batch Epoch: 4/20... Training Step: 1593... Training loss: 1.3498... 0.1319 sec/batch Epoch: 4/20... Training Step: 1594... Training loss: 1.3829... 0.1224 sec/batch Epoch: 4/20... Training Step: 1595... Training loss: 1.3758... 0.1255 sec/batch Epoch: 4/20... Training Step: 1596... Training loss: 1.5544... 0.1204 sec/batch Epoch: 4/20... Training Step: 1597... Training loss: 1.3693... 0.1221 sec/batch Epoch: 4/20... Training Step: 1598... Training loss: 1.2057... 0.1349 sec/batch Epoch: 4/20... Training Step: 1599... Training loss: 1.2627... 0.1315 sec/batch Epoch: 4/20... Training Step: 1600... Training loss: 1.4666... 0.1432 sec/batch Epoch: 4/20... Training Step: 1601... Training loss: 1.4690... 0.1338 sec/batch Epoch: 4/20... Training Step: 1602... Training loss: 1.3161... 0.1553 sec/batch Epoch: 4/20... Training Step: 1603... Training loss: 1.0884... 0.1282 sec/batch Epoch: 4/20... Training Step: 1604... Training loss: 1.4143... 0.1227 sec/batch Epoch: 4/20... Training Step: 1605... Training loss: 1.5037... 0.1251 sec/batch Epoch: 4/20... Training Step: 1606... Training loss: 1.4227... 0.1244 sec/batch Epoch: 4/20... Training Step: 1607... Training loss: 1.5015... 0.1221 sec/batch Epoch: 4/20... Training Step: 1608... Training loss: 1.3333... 0.1235 sec/batch Epoch: 4/20... Training Step: 1609... Training loss: 1.5212... 0.1224 sec/batch Epoch: 4/20... Training Step: 1610... Training loss: 1.2993... 0.1238 sec/batch Epoch: 4/20... Training Step: 1611... Training loss: 1.5958... 0.1299 sec/batch Epoch: 4/20... Training Step: 1612... Training loss: 1.3819... 0.1300 sec/batch Epoch: 4/20... Training Step: 1613... Training loss: 1.2495... 0.1296 sec/batch Epoch: 4/20... Training Step: 1614... Training loss: 1.6065... 0.1306 sec/batch Epoch: 4/20... Training Step: 1615... Training loss: 1.6064... 0.1313 sec/batch Epoch: 4/20... Training Step: 1616... Training loss: 1.5914... 0.1289 sec/batch Epoch: 4/20... Training Step: 1617... Training loss: 1.3934... 0.1289 sec/batch Epoch: 4/20... Training Step: 1618... Training loss: 1.6287... 0.1289 sec/batch Epoch: 4/20... Training Step: 1619... Training loss: 1.5575... 0.1284 sec/batch Epoch: 4/20... Training Step: 1620... Training loss: 1.2240... 0.1257 sec/batch Epoch: 4/20... Training Step: 1621... Training loss: 1.4031... 0.1268 sec/batch Epoch: 4/20... Training Step: 1622... Training loss: 1.3496... 0.1323 sec/batch Epoch: 4/20... Training Step: 1623... Training loss: 1.3593... 0.1295 sec/batch Epoch: 4/20... Training Step: 1624... Training loss: 1.3055... 0.1310 sec/batch Epoch: 4/20... Training Step: 1625... Training loss: 1.6255... 0.1297 sec/batch Epoch: 4/20... Training Step: 1626... Training loss: 1.3199... 0.1245 sec/batch Epoch: 4/20... Training Step: 1627... Training loss: 1.6438... 0.1193 sec/batch Epoch: 4/20... Training Step: 1628... Training loss: 1.3492... 0.1222 sec/batch Epoch: 4/20... Training Step: 1629... Training loss: 1.7020... 0.1184 sec/batch Epoch: 4/20... Training Step: 1630... Training loss: 1.3203... 0.1252 sec/batch Epoch: 4/20... Training Step: 1631... Training loss: 1.4881... 0.1253 sec/batch Epoch: 4/20... Training Step: 1632... Training loss: 1.5097... 0.1213 sec/batch Epoch: 4/20... Training Step: 1633... Training loss: 1.4173... 0.1257 sec/batch Epoch: 4/20... Training Step: 1634... Training loss: 1.3100... 0.1241 sec/batch Epoch: 4/20... Training Step: 1635... Training loss: 1.6259... 0.1226 sec/batch Epoch: 4/20... Training Step: 1636... Training loss: 1.5558... 0.1209 sec/batch Epoch: 4/20... Training Step: 1637... Training loss: 1.5569... 0.1200 sec/batch Epoch: 4/20... Training Step: 1638... Training loss: 1.3143... 0.1193 sec/batch Epoch: 4/20... Training Step: 1639... Training loss: 1.2947... 0.1234 sec/batch Epoch: 4/20... Training Step: 1640... Training loss: 1.4623... 0.1248 sec/batch Epoch: 4/20... Training Step: 1641... Training loss: 1.3586... 0.1394 sec/batch Epoch: 4/20... Training Step: 1642... Training loss: 1.3476... 0.1464 sec/batch Epoch: 4/20... Training Step: 1643... Training loss: 1.4967... 0.1290 sec/batch Epoch: 4/20... Training Step: 1644... Training loss: 1.3591... 0.1230 sec/batch Epoch: 4/20... Training Step: 1645... Training loss: 1.2847... 0.1208 sec/batch Epoch: 4/20... Training Step: 1646... Training loss: 1.5421... 0.1176 sec/batch Epoch: 4/20... Training Step: 1647... Training loss: 1.3966... 0.1211 sec/batch Epoch: 4/20... Training Step: 1648... Training loss: 1.3962... 0.1202 sec/batch Epoch: 4/20... Training Step: 1649... Training loss: 1.4429... 0.1268 sec/batch Epoch: 4/20... Training Step: 1650... Training loss: 1.3419... 0.1183 sec/batch Epoch: 4/20... Training Step: 1651... Training loss: 1.3109... 0.1250 sec/batch Epoch: 4/20... Training Step: 1652... Training loss: 1.4374... 0.1219 sec/batch Epoch: 4/20... Training Step: 1653... Training loss: 1.3746... 0.1232 sec/batch Epoch: 4/20... Training Step: 1654... Training loss: 1.5243... 0.1224 sec/batch Epoch: 4/20... Training Step: 1655... Training loss: 1.4850... 0.1235 sec/batch Epoch: 4/20... Training Step: 1656... Training loss: 1.5935... 0.1210 sec/batch Epoch: 4/20... Training Step: 1657... Training loss: 1.5334... 0.1215 sec/batch Epoch: 4/20... Training Step: 1658... Training loss: 1.4143... 0.1209 sec/batch Epoch: 4/20... Training Step: 1659... Training loss: 1.5740... 0.1208 sec/batch Epoch: 4/20... Training Step: 1660... Training loss: 1.5156... 0.1212 sec/batch Epoch: 4/20... Training Step: 1661... Training loss: 1.4966... 0.1257 sec/batch Epoch: 4/20... Training Step: 1662... Training loss: 1.5426... 0.1237 sec/batch Epoch: 4/20... Training Step: 1663... Training loss: 1.4888... 0.1230 sec/batch Epoch: 4/20... Training Step: 1664... Training loss: 1.5367... 0.1220 sec/batch Epoch: 4/20... Training Step: 1665... Training loss: 1.5524... 0.1247 sec/batch Epoch: 4/20... Training Step: 1666... Training loss: 1.4987... 0.1335 sec/batch Epoch: 4/20... Training Step: 1667... Training loss: 1.6245... 0.1233 sec/batch Epoch: 4/20... Training Step: 1668... Training loss: 1.4191... 0.1230 sec/batch Epoch: 4/20... Training Step: 1669... Training loss: 1.4755... 0.1256 sec/batch Epoch: 4/20... Training Step: 1670... Training loss: 1.7585... 0.1238 sec/batch Epoch: 4/20... Training Step: 1671... Training loss: 1.4467... 0.1227 sec/batch Epoch: 4/20... Training Step: 1672... Training loss: 1.3123... 0.1213 sec/batch Epoch: 4/20... Training Step: 1673... Training loss: 1.3756... 0.1251 sec/batch Epoch: 4/20... Training Step: 1674... Training loss: 1.2536... 0.1206 sec/batch Epoch: 4/20... Training Step: 1675... Training loss: 1.3763... 0.1223 sec/batch Epoch: 4/20... Training Step: 1676... Training loss: 1.4851... 0.1184 sec/batch Epoch: 4/20... Training Step: 1677... Training loss: 1.2984... 0.1213 sec/batch Epoch: 4/20... Training Step: 1678... Training loss: 1.4871... 0.1206 sec/batch Epoch: 4/20... Training Step: 1679... Training loss: 1.3846... 0.1213 sec/batch Epoch: 4/20... Training Step: 1680... Training loss: 1.5179... 0.1239 sec/batch Epoch: 4/20... Training Step: 1681... Training loss: 1.4800... 0.1216 sec/batch Epoch: 4/20... Training Step: 1682... Training loss: 1.4224... 0.1206 sec/batch Epoch: 4/20... Training Step: 1683... Training loss: 1.3142... 0.1271 sec/batch Epoch: 4/20... Training Step: 1684... Training loss: 1.2903... 0.1236 sec/batch Epoch: 4/20... Training Step: 1685... Training loss: 1.2910... 0.1238 sec/batch Epoch: 4/20... Training Step: 1686... Training loss: 1.4233... 0.1226 sec/batch Epoch: 4/20... Training Step: 1687... Training loss: 1.3648... 0.1250 sec/batch Epoch: 4/20... Training Step: 1688... Training loss: 1.6586... 0.1287 sec/batch Epoch: 4/20... Training Step: 1689... Training loss: 1.3122... 0.1221 sec/batch Epoch: 4/20... Training Step: 1690... Training loss: 1.3081... 0.1304 sec/batch Epoch: 4/20... Training Step: 1691... Training loss: 1.3743... 0.1287 sec/batch Epoch: 4/20... Training Step: 1692... Training loss: 1.3375... 0.1274 sec/batch Epoch: 4/20... Training Step: 1693... Training loss: 1.3612... 0.1261 sec/batch Epoch: 4/20... Training Step: 1694... Training loss: 1.2892... 0.1288 sec/batch Epoch: 4/20... Training Step: 1695... Training loss: 1.1671... 0.1340 sec/batch Epoch: 4/20... Training Step: 1696... Training loss: 1.5735... 0.1237 sec/batch Epoch: 4/20... Training Step: 1697... Training loss: 1.3152... 0.1234 sec/batch Epoch: 4/20... Training Step: 1698... Training loss: 1.4739... 0.1179 sec/batch Epoch: 4/20... Training Step: 1699... Training loss: 1.3768... 0.1204 sec/batch Epoch: 4/20... Training Step: 1700... Training loss: 1.8189... 0.1197 sec/batch Epoch: 4/20... Training Step: 1701... Training loss: 1.4422... 0.1188 sec/batch Epoch: 4/20... Training Step: 1702... Training loss: 1.4908... 0.1234 sec/batch Epoch: 4/20... Training Step: 1703... Training loss: 1.3663... 0.1165 sec/batch Epoch: 4/20... Training Step: 1704... Training loss: 1.2265... 0.1173 sec/batch Epoch: 4/20... Training Step: 1705... Training loss: 1.3273... 0.1172 sec/batch Epoch: 4/20... Training Step: 1706... Training loss: 1.2965... 0.1192 sec/batch Epoch: 4/20... Training Step: 1707... Training loss: 1.1678... 0.1235 sec/batch Epoch: 4/20... Training Step: 1708... Training loss: 1.3140... 0.1240 sec/batch Epoch: 4/20... Training Step: 1709... Training loss: 1.4159... 0.1265 sec/batch Epoch: 4/20... Training Step: 1710... Training loss: 1.2664... 0.1191 sec/batch Epoch: 4/20... Training Step: 1711... Training loss: 1.3747... 0.1237 sec/batch Epoch: 4/20... Training Step: 1712... Training loss: 1.2646... 0.1211 sec/batch Epoch: 4/20... Training Step: 1713... Training loss: 1.2504... 0.1237 sec/batch Epoch: 4/20... Training Step: 1714... Training loss: 1.5354... 0.1215 sec/batch Epoch: 4/20... Training Step: 1715... Training loss: 1.3490... 0.1257 sec/batch Epoch: 4/20... Training Step: 1716... Training loss: 1.2395... 0.1245 sec/batch Epoch: 4/20... Training Step: 1717... Training loss: 1.2260... 0.1177 sec/batch Epoch: 4/20... Training Step: 1718... Training loss: 1.1965... 0.1230 sec/batch Epoch: 4/20... Training Step: 1719... Training loss: 1.4599... 0.1239 sec/batch Epoch: 4/20... Training Step: 1720... Training loss: 1.3538... 0.1203 sec/batch Epoch: 4/20... Training Step: 1721... Training loss: 1.4356... 0.1214 sec/batch Epoch: 4/20... Training Step: 1722... Training loss: 1.2716... 0.1216 sec/batch Epoch: 4/20... Training Step: 1723... Training loss: 1.3971... 0.1220 sec/batch Epoch: 4/20... Training Step: 1724... Training loss: 1.3930... 0.1184 sec/batch Epoch: 4/20... Training Step: 1725... Training loss: 1.3373... 0.1242 sec/batch Epoch: 4/20... Training Step: 1726... Training loss: 1.3501... 0.1200 sec/batch Epoch: 4/20... Training Step: 1727... Training loss: 1.5114... 0.1196 sec/batch Epoch: 4/20... Training Step: 1728... Training loss: 1.3357... 0.1212 sec/batch Epoch: 4/20... Training Step: 1729... Training loss: 1.4122... 0.1229 sec/batch Epoch: 4/20... Training Step: 1730... Training loss: 1.3890... 0.1264 sec/batch Epoch: 4/20... Training Step: 1731... Training loss: 1.4253... 0.1228 sec/batch Epoch: 4/20... Training Step: 1732... Training loss: 1.2306... 0.1203 sec/batch Epoch: 4/20... Training Step: 1733... Training loss: 1.2428... 0.1333 sec/batch Epoch: 4/20... Training Step: 1734... Training loss: 1.4867... 0.1221 sec/batch Epoch: 4/20... Training Step: 1735... Training loss: 1.1870... 0.1257 sec/batch Epoch: 4/20... Training Step: 1736... Training loss: 1.4857... 0.1272 sec/batch Epoch: 4/20... Training Step: 1737... Training loss: 1.2644... 0.1221 sec/batch Epoch: 4/20... Training Step: 1738... Training loss: 1.2092... 0.1258 sec/batch Epoch: 4/20... Training Step: 1739... Training loss: 1.0920... 0.1264 sec/batch Epoch: 4/20... Training Step: 1740... Training loss: 1.6339... 0.1245 sec/batch Epoch: 4/20... Training Step: 1741... Training loss: 1.2019... 0.1238 sec/batch Epoch: 4/20... Training Step: 1742... Training loss: 1.3660... 0.1203 sec/batch Epoch: 4/20... Training Step: 1743... Training loss: 1.2780... 0.1227 sec/batch Epoch: 4/20... Training Step: 1744... Training loss: 1.1986... 0.1247 sec/batch Epoch: 4/20... Training Step: 1745... Training loss: 1.2250... 0.1214 sec/batch Epoch: 4/20... Training Step: 1746... Training loss: 1.0924... 0.1207 sec/batch Epoch: 4/20... Training Step: 1747... Training loss: 1.4161... 0.1241 sec/batch Epoch: 4/20... Training Step: 1748... Training loss: 1.2304... 0.1211 sec/batch Epoch: 4/20... Training Step: 1749... Training loss: 1.2339... 0.1246 sec/batch Epoch: 4/20... Training Step: 1750... Training loss: 1.3953... 0.1239 sec/batch Epoch: 4/20... Training Step: 1751... Training loss: 1.5165... 0.1247 sec/batch Epoch: 4/20... Training Step: 1752... Training loss: 1.0447... 0.1209 sec/batch Epoch: 4/20... Training Step: 1753... Training loss: 1.5157... 0.1246 sec/batch Epoch: 4/20... Training Step: 1754... Training loss: 1.4152... 0.1217 sec/batch Epoch: 4/20... Training Step: 1755... Training loss: 1.2556... 0.1244 sec/batch Epoch: 4/20... Training Step: 1756... Training loss: 1.3216... 0.1244 sec/batch Epoch: 4/20... Training Step: 1757... Training loss: 1.2850... 0.1195 sec/batch Epoch: 4/20... Training Step: 1758... Training loss: 1.3210... 0.1210 sec/batch Epoch: 4/20... Training Step: 1759... Training loss: 1.2160... 0.1211 sec/batch Epoch: 4/20... Training Step: 1760... Training loss: 1.4533... 0.1241 sec/batch Epoch: 4/20... Training Step: 1761... Training loss: 1.4620... 0.1244 sec/batch Epoch: 4/20... Training Step: 1762... Training loss: 1.3681... 0.1226 sec/batch Epoch: 4/20... Training Step: 1763... Training loss: 1.3665... 0.1221 sec/batch Epoch: 4/20... Training Step: 1764... Training loss: 1.5026... 0.1198 sec/batch Epoch: 4/20... Training Step: 1765... Training loss: 1.4163... 0.1240 sec/batch Epoch: 4/20... Training Step: 1766... Training loss: 1.5014... 0.1233 sec/batch Epoch: 4/20... Training Step: 1767... Training loss: 1.4156... 0.1220 sec/batch Epoch: 4/20... Training Step: 1768... Training loss: 1.3675... 0.1246 sec/batch Epoch: 4/20... Training Step: 1769... Training loss: 1.4033... 0.1246 sec/batch Epoch: 4/20... Training Step: 1770... Training loss: 1.4267... 0.1186 sec/batch Epoch: 4/20... Training Step: 1771... Training loss: 1.3637... 0.1161 sec/batch Epoch: 4/20... Training Step: 1772... Training loss: 1.5148... 0.1200 sec/batch Epoch: 4/20... Training Step: 1773... Training loss: 1.2203... 0.1197 sec/batch Epoch: 4/20... Training Step: 1774... Training loss: 1.2130... 0.1205 sec/batch Epoch: 4/20... Training Step: 1775... Training loss: 1.3131... 0.1195 sec/batch Epoch: 4/20... Training Step: 1776... Training loss: 1.3942... 0.1196 sec/batch Epoch: 4/20... Training Step: 1777... Training loss: 1.2957... 0.1184 sec/batch Epoch: 4/20... Training Step: 1778... Training loss: 1.3659... 0.1243 sec/batch Epoch: 4/20... Training Step: 1779... Training loss: 1.3520... 0.1225 sec/batch Epoch: 4/20... Training Step: 1780... Training loss: 1.2150... 0.1190 sec/batch Epoch: 4/20... Training Step: 1781... Training loss: 1.2827... 0.1175 sec/batch Epoch: 4/20... Training Step: 1782... Training loss: 1.3544... 0.1189 sec/batch Epoch: 4/20... Training Step: 1783... Training loss: 1.2194... 0.1213 sec/batch Epoch: 4/20... Training Step: 1784... Training loss: 1.2046... 0.1239 sec/batch Epoch: 4/20... Training Step: 1785... Training loss: 1.2055... 0.1211 sec/batch Epoch: 4/20... Training Step: 1786... Training loss: 1.2770... 0.1193 sec/batch Epoch: 4/20... Training Step: 1787... Training loss: 1.2163... 0.1212 sec/batch Epoch: 4/20... Training Step: 1788... Training loss: 1.5733... 0.1216 sec/batch Epoch: 4/20... Training Step: 1789... Training loss: 1.2982... 0.1211 sec/batch Epoch: 4/20... Training Step: 1790... Training loss: 1.2508... 0.1250 sec/batch Epoch: 4/20... Training Step: 1791... Training loss: 1.3930... 0.1236 sec/batch Epoch: 4/20... Training Step: 1792... Training loss: 1.2744... 0.1216 sec/batch Epoch: 4/20... Training Step: 1793... Training loss: 1.2828... 0.1240 sec/batch Epoch: 4/20... Training Step: 1794... Training loss: 1.2882... 0.1256 sec/batch Epoch: 4/20... Training Step: 1795... Training loss: 1.1593... 0.1237 sec/batch Epoch: 4/20... Training Step: 1796... Training loss: 1.3708... 0.1224 sec/batch Epoch: 4/20... Training Step: 1797... Training loss: 1.2979... 0.1220 sec/batch Epoch: 4/20... Training Step: 1798... Training loss: 1.6349... 0.1223 sec/batch Epoch: 4/20... Training Step: 1799... Training loss: 1.3745... 0.1240 sec/batch Epoch: 4/20... Training Step: 1800... Training loss: 1.6144... 0.1219 sec/batch Epoch: 4/20... Training Step: 1801... Training loss: 1.4490... 0.1205 sec/batch Epoch: 4/20... Training Step: 1802... Training loss: 1.3823... 0.1222 sec/batch Epoch: 4/20... Training Step: 1803... Training loss: 1.2400... 0.1209 sec/batch Epoch: 4/20... Training Step: 1804... Training loss: 1.4599... 0.1212 sec/batch Epoch: 4/20... Training Step: 1805... Training loss: 1.5113... 0.1237 sec/batch Epoch: 4/20... Training Step: 1806... Training loss: 1.4261... 0.1172 sec/batch Epoch: 4/20... Training Step: 1807... Training loss: 1.6043... 0.1224 sec/batch Epoch: 4/20... Training Step: 1808... Training loss: 1.6254... 0.1239 sec/batch Epoch: 4/20... Training Step: 1809... Training loss: 1.6262... 0.1234 sec/batch Epoch: 4/20... Training Step: 1810... Training loss: 1.2339... 0.1252 sec/batch Epoch: 4/20... Training Step: 1811... Training loss: 1.4666... 0.1193 sec/batch Epoch: 4/20... Training Step: 1812... Training loss: 1.1788... 0.1275 sec/batch Epoch: 4/20... Training Step: 1813... Training loss: 1.5304... 0.1238 sec/batch Epoch: 4/20... Training Step: 1814... Training loss: 1.4188... 0.1226 sec/batch Epoch: 4/20... Training Step: 1815... Training loss: 1.5319... 0.1204 sec/batch Epoch: 4/20... Training Step: 1816... Training loss: 1.5240... 0.1202 sec/batch Epoch: 4/20... Training Step: 1817... Training loss: 1.3966... 0.1207 sec/batch Epoch: 4/20... Training Step: 1818... Training loss: 1.3632... 0.1177 sec/batch Epoch: 4/20... Training Step: 1819... Training loss: 1.4918... 0.1213 sec/batch Epoch: 4/20... Training Step: 1820... Training loss: 1.4537... 0.1198 sec/batch Epoch: 4/20... Training Step: 1821... Training loss: 1.2203... 0.1245 sec/batch Epoch: 4/20... Training Step: 1822... Training loss: 1.4192... 0.1237 sec/batch Epoch: 4/20... Training Step: 1823... Training loss: 1.6875... 0.1224 sec/batch Epoch: 4/20... Training Step: 1824... Training loss: 1.3888... 0.1219 sec/batch Epoch: 4/20... Training Step: 1825... Training loss: 1.7070... 0.1238 sec/batch Epoch: 4/20... Training Step: 1826... Training loss: 1.5605... 0.1244 sec/batch Epoch: 4/20... Training Step: 1827... Training loss: 1.2419... 0.1290 sec/batch Epoch: 4/20... Training Step: 1828... Training loss: 1.3892... 0.1277 sec/batch Epoch: 4/20... Training Step: 1829... Training loss: 1.3549... 0.1251 sec/batch Epoch: 4/20... Training Step: 1830... Training loss: 1.6378... 0.1176 sec/batch Epoch: 4/20... Training Step: 1831... Training loss: 1.6030... 0.1207 sec/batch Epoch: 4/20... Training Step: 1832... Training loss: 1.6696... 0.1241 sec/batch Epoch: 4/20... Training Step: 1833... Training loss: 1.4242... 0.1178 sec/batch Epoch: 4/20... Training Step: 1834... Training loss: 1.3844... 0.1257 sec/batch Epoch: 4/20... Training Step: 1835... Training loss: 1.5733... 0.1199 sec/batch Epoch: 4/20... Training Step: 1836... Training loss: 1.3559... 0.1257 sec/batch Epoch: 4/20... Training Step: 1837... Training loss: 1.4308... 0.1295 sec/batch Epoch: 4/20... Training Step: 1838... Training loss: 1.2822... 0.1288 sec/batch Epoch: 4/20... Training Step: 1839... Training loss: 1.3567... 0.1241 sec/batch Epoch: 4/20... Training Step: 1840... Training loss: 1.1859... 0.1205 sec/batch Epoch: 4/20... Training Step: 1841... Training loss: 1.3348... 0.1231 sec/batch Epoch: 4/20... Training Step: 1842... Training loss: 1.4622... 0.1269 sec/batch Epoch: 4/20... Training Step: 1843... Training loss: 1.3571... 0.1211 sec/batch Epoch: 4/20... Training Step: 1844... Training loss: 1.4487... 0.1246 sec/batch Epoch: 4/20... Training Step: 1845... Training loss: 1.2888... 0.1223 sec/batch Epoch: 4/20... Training Step: 1846... Training loss: 1.6262... 0.1220 sec/batch Epoch: 4/20... Training Step: 1847... Training loss: 1.4500... 0.1233 sec/batch Epoch: 4/20... Training Step: 1848... Training loss: 1.2393... 0.1183 sec/batch Epoch: 4/20... Training Step: 1849... Training loss: 1.2943... 0.1254 sec/batch Epoch: 4/20... Training Step: 1850... Training loss: 1.2009... 0.1254 sec/batch Epoch: 4/20... Training Step: 1851... Training loss: 1.2055... 0.1217 sec/batch Epoch: 4/20... Training Step: 1852... Training loss: 1.3902... 0.1260 sec/batch Epoch: 4/20... Training Step: 1853... Training loss: 1.4751... 0.1182 sec/batch Epoch: 4/20... Training Step: 1854... Training loss: 1.4420... 0.1227 sec/batch Epoch: 4/20... Training Step: 1855... Training loss: 1.4047... 0.1219 sec/batch Epoch: 4/20... Training Step: 1856... Training loss: 1.2511... 0.1212 sec/batch Epoch: 5/20... Training Step: 1857... Training loss: 1.7105... 0.1221 sec/batch Epoch: 5/20... Training Step: 1858... Training loss: 1.4392... 0.1208 sec/batch Epoch: 5/20... Training Step: 1859... Training loss: 1.3687... 0.1208 sec/batch Epoch: 5/20... Training Step: 1860... Training loss: 1.3123... 0.1208 sec/batch Epoch: 5/20... Training Step: 1861... Training loss: 1.4527... 0.1202 sec/batch Epoch: 5/20... Training Step: 1862... Training loss: 1.1734... 0.1185 sec/batch Epoch: 5/20... Training Step: 1863... Training loss: 1.4351... 0.1180 sec/batch Epoch: 5/20... Training Step: 1864... Training loss: 1.2495... 0.1229 sec/batch Epoch: 5/20... Training Step: 1865... Training loss: 1.2998... 0.1207 sec/batch Epoch: 5/20... Training Step: 1866... Training loss: 1.4686... 0.1215 sec/batch Epoch: 5/20... Training Step: 1867... Training loss: 1.3448... 0.1185 sec/batch Epoch: 5/20... Training Step: 1868... Training loss: 1.1075... 0.1214 sec/batch Epoch: 5/20... Training Step: 1869... Training loss: 1.5307... 0.1251 sec/batch Epoch: 5/20... Training Step: 1870... Training loss: 1.0672... 0.1218 sec/batch Epoch: 5/20... Training Step: 1871... Training loss: 1.3289... 0.1217 sec/batch Epoch: 5/20... Training Step: 1872... Training loss: 1.4295... 0.1224 sec/batch Epoch: 5/20... Training Step: 1873... Training loss: 1.2210... 0.1206 sec/batch Epoch: 5/20... Training Step: 1874... Training loss: 1.1445... 0.1275 sec/batch Epoch: 5/20... Training Step: 1875... Training loss: 1.3625... 0.1205 sec/batch Epoch: 5/20... Training Step: 1876... Training loss: 1.1365... 0.1285 sec/batch Epoch: 5/20... Training Step: 1877... Training loss: 1.4231... 0.1338 sec/batch Epoch: 5/20... Training Step: 1878... Training loss: 1.2400... 0.1332 sec/batch Epoch: 5/20... Training Step: 1879... Training loss: 1.4825... 0.1294 sec/batch Epoch: 5/20... Training Step: 1880... Training loss: 1.2371... 0.1276 sec/batch Epoch: 5/20... Training Step: 1881... Training loss: 1.3419... 0.1256 sec/batch Epoch: 5/20... Training Step: 1882... Training loss: 1.2379... 0.1311 sec/batch Epoch: 5/20... Training Step: 1883... Training loss: 1.4356... 0.1354 sec/batch Epoch: 5/20... Training Step: 1884... Training loss: 1.1254... 0.1272 sec/batch Epoch: 5/20... Training Step: 1885... Training loss: 1.2998... 0.1333 sec/batch Epoch: 5/20... Training Step: 1886... Training loss: 1.3118... 0.1359 sec/batch Epoch: 5/20... Training Step: 1887... Training loss: 1.1696... 0.1302 sec/batch Epoch: 5/20... Training Step: 1888... Training loss: 1.2502... 0.1343 sec/batch Epoch: 5/20... Training Step: 1889... Training loss: 1.1884... 0.1181 sec/batch Epoch: 5/20... Training Step: 1890... Training loss: 1.1185... 0.1325 sec/batch Epoch: 5/20... Training Step: 1891... Training loss: 1.1801... 0.1278 sec/batch Epoch: 5/20... Training Step: 1892... Training loss: 1.2366... 0.1285 sec/batch Epoch: 5/20... Training Step: 1893... Training loss: 1.3793... 0.1291 sec/batch Epoch: 5/20... Training Step: 1894... Training loss: 1.1703... 0.1350 sec/batch Epoch: 5/20... Training Step: 1895... Training loss: 1.2572... 0.1330 sec/batch Epoch: 5/20... Training Step: 1896... Training loss: 1.5438... 0.1270 sec/batch Epoch: 5/20... Training Step: 1897... Training loss: 1.2769... 0.1335 sec/batch Epoch: 5/20... Training Step: 1898... Training loss: 1.1648... 0.1309 sec/batch Epoch: 5/20... Training Step: 1899... Training loss: 1.4280... 0.1296 sec/batch Epoch: 5/20... Training Step: 1900... Training loss: 1.1000... 0.1308 sec/batch Epoch: 5/20... Training Step: 1901... Training loss: 1.2958... 0.1389 sec/batch Epoch: 5/20... Training Step: 1902... Training loss: 1.2596... 0.1345 sec/batch Epoch: 5/20... Training Step: 1903... Training loss: 1.2586... 0.1307 sec/batch Epoch: 5/20... Training Step: 1904... Training loss: 1.2538... 0.1231 sec/batch Epoch: 5/20... Training Step: 1905... Training loss: 1.2464... 0.1273 sec/batch Epoch: 5/20... Training Step: 1906... Training loss: 1.3838... 0.1286 sec/batch Epoch: 5/20... Training Step: 1907... Training loss: 1.2342... 0.1372 sec/batch Epoch: 5/20... Training Step: 1908... Training loss: 1.3099... 0.1282 sec/batch Epoch: 5/20... Training Step: 1909... Training loss: 1.4159... 0.1301 sec/batch Epoch: 5/20... Training Step: 1910... Training loss: 1.4648... 0.1310 sec/batch Epoch: 5/20... Training Step: 1911... Training loss: 1.1761... 0.1243 sec/batch Epoch: 5/20... Training Step: 1912... Training loss: 1.2998... 0.1310 sec/batch Epoch: 5/20... Training Step: 1913... Training loss: 1.4938... 0.1315 sec/batch Epoch: 5/20... Training Step: 1914... Training loss: 1.3429... 0.1309 sec/batch Epoch: 5/20... Training Step: 1915... Training loss: 1.1736... 0.1322 sec/batch Epoch: 5/20... Training Step: 1916... Training loss: 1.1663... 0.1367 sec/batch Epoch: 5/20... Training Step: 1917... Training loss: 1.3314... 0.1303 sec/batch Epoch: 5/20... Training Step: 1918... Training loss: 1.4389... 0.1276 sec/batch Epoch: 5/20... Training Step: 1919... Training loss: 1.1892... 0.1304 sec/batch Epoch: 5/20... Training Step: 1920... Training loss: 1.2446... 0.1375 sec/batch Epoch: 5/20... Training Step: 1921... Training loss: 1.2025... 0.1246 sec/batch Epoch: 5/20... Training Step: 1922... Training loss: 1.4916... 0.1248 sec/batch Epoch: 5/20... Training Step: 1923... Training loss: 1.2647... 0.1320 sec/batch Epoch: 5/20... Training Step: 1924... Training loss: 1.3336... 0.1256 sec/batch Epoch: 5/20... Training Step: 1925... Training loss: 1.2618... 0.1258 sec/batch Epoch: 5/20... Training Step: 1926... Training loss: 1.2409... 0.1357 sec/batch Epoch: 5/20... Training Step: 1927... Training loss: 1.3869... 0.1303 sec/batch Epoch: 5/20... Training Step: 1928... Training loss: 1.3183... 0.1320 sec/batch Epoch: 5/20... Training Step: 1929... Training loss: 1.3104... 0.1383 sec/batch Epoch: 5/20... Training Step: 1930... Training loss: 1.1808... 0.1385 sec/batch Epoch: 5/20... Training Step: 1931... Training loss: 1.4363... 0.1305 sec/batch Epoch: 5/20... Training Step: 1932... Training loss: 1.2102... 0.1289 sec/batch Epoch: 5/20... Training Step: 1933... Training loss: 1.2449... 0.1369 sec/batch Epoch: 5/20... Training Step: 1934... Training loss: 1.2348... 0.1138 sec/batch Epoch: 5/20... Training Step: 1935... Training loss: 1.2913... 0.1205 sec/batch Epoch: 5/20... Training Step: 1936... Training loss: 1.2973... 0.1301 sec/batch Epoch: 5/20... Training Step: 1937... Training loss: 1.4525... 0.1316 sec/batch Epoch: 5/20... Training Step: 1938... Training loss: 1.3099... 0.1242 sec/batch Epoch: 5/20... Training Step: 1939... Training loss: 1.3122... 0.1203 sec/batch Epoch: 5/20... Training Step: 1940... Training loss: 1.4561... 0.1139 sec/batch Epoch: 5/20... Training Step: 1941... Training loss: 1.4068... 0.1303 sec/batch Epoch: 5/20... Training Step: 1942... Training loss: 1.3605... 0.1296 sec/batch Epoch: 5/20... Training Step: 1943... Training loss: 1.2415... 0.1241 sec/batch Epoch: 5/20... Training Step: 1944... Training loss: 1.4320... 0.1227 sec/batch Epoch: 5/20... Training Step: 1945... Training loss: 1.4569... 0.1209 sec/batch Epoch: 5/20... Training Step: 1946... Training loss: 1.2464... 0.1224 sec/batch Epoch: 5/20... Training Step: 1947... Training loss: 1.4470... 0.1210 sec/batch Epoch: 5/20... Training Step: 1948... Training loss: 1.4155... 0.1351 sec/batch Epoch: 5/20... Training Step: 1949... Training loss: 1.2021... 0.1289 sec/batch Epoch: 5/20... Training Step: 1950... Training loss: 1.4768... 0.1244 sec/batch Epoch: 5/20... Training Step: 1951... Training loss: 1.2854... 0.1321 sec/batch Epoch: 5/20... Training Step: 1952... Training loss: 1.3493... 0.1369 sec/batch Epoch: 5/20... Training Step: 1953... Training loss: 1.6357... 0.1289 sec/batch Epoch: 5/20... Training Step: 1954... Training loss: 1.4641... 0.1247 sec/batch Epoch: 5/20... Training Step: 1955... Training loss: 1.3350... 0.1318 sec/batch Epoch: 5/20... Training Step: 1956... Training loss: 1.2323... 0.1292 sec/batch Epoch: 5/20... Training Step: 1957... Training loss: 1.4544... 0.1296 sec/batch Epoch: 5/20... Training Step: 1958... Training loss: 1.6270... 0.1229 sec/batch Epoch: 5/20... Training Step: 1959... Training loss: 1.5493... 0.1312 sec/batch Epoch: 5/20... Training Step: 1960... Training loss: 1.3591... 0.1268 sec/batch Epoch: 5/20... Training Step: 1961... Training loss: 1.5031... 0.1207 sec/batch Epoch: 5/20... Training Step: 1962... Training loss: 1.6247... 0.1215 sec/batch Epoch: 5/20... Training Step: 1963... Training loss: 1.4599... 0.1295 sec/batch Epoch: 5/20... Training Step: 1964... Training loss: 1.5750... 0.1308 sec/batch Epoch: 5/20... Training Step: 1965... Training loss: 1.3838... 0.1406 sec/batch Epoch: 5/20... Training Step: 1966... Training loss: 1.2777... 0.1342 sec/batch Epoch: 5/20... Training Step: 1967... Training loss: 1.4589... 0.1398 sec/batch Epoch: 5/20... Training Step: 1968... Training loss: 1.4192... 0.1329 sec/batch Epoch: 5/20... Training Step: 1969... Training loss: 1.4203... 0.1309 sec/batch Epoch: 5/20... Training Step: 1970... Training loss: 1.5021... 0.1325 sec/batch Epoch: 5/20... Training Step: 1971... Training loss: 1.4147... 0.1296 sec/batch Epoch: 5/20... Training Step: 1972... Training loss: 1.3449... 0.1320 sec/batch Epoch: 5/20... Training Step: 1973... Training loss: 1.4838... 0.1299 sec/batch Epoch: 5/20... Training Step: 1974... Training loss: 1.3985... 0.1306 sec/batch Epoch: 5/20... Training Step: 1975... Training loss: 1.3387... 0.1373 sec/batch Epoch: 5/20... Training Step: 1976... Training loss: 1.1368... 0.1269 sec/batch Epoch: 5/20... Training Step: 1977... Training loss: 1.3112... 0.1307 sec/batch Epoch: 5/20... Training Step: 1978... Training loss: 1.4505... 0.1320 sec/batch Epoch: 5/20... Training Step: 1979... Training loss: 1.3829... 0.1240 sec/batch Epoch: 5/20... Training Step: 1980... Training loss: 1.5161... 0.1268 sec/batch Epoch: 5/20... Training Step: 1981... Training loss: 1.3980... 0.1318 sec/batch Epoch: 5/20... Training Step: 1982... Training loss: 1.1892... 0.1297 sec/batch Epoch: 5/20... Training Step: 1983... Training loss: 1.4018... 0.1304 sec/batch Epoch: 5/20... Training Step: 1984... Training loss: 1.3985... 0.1267 sec/batch Epoch: 5/20... Training Step: 1985... Training loss: 1.5136... 0.1328 sec/batch Epoch: 5/20... Training Step: 1986... Training loss: 1.2822... 0.1307 sec/batch Epoch: 5/20... Training Step: 1987... Training loss: 1.6553... 0.1299 sec/batch Epoch: 5/20... Training Step: 1988... Training loss: 1.3044... 0.1330 sec/batch Epoch: 5/20... Training Step: 1989... Training loss: 1.3977... 0.1297 sec/batch Epoch: 5/20... Training Step: 1990... Training loss: 1.4979... 0.1301 sec/batch Epoch: 5/20... Training Step: 1991... Training loss: 1.2436... 0.1321 sec/batch Epoch: 5/20... Training Step: 1992... Training loss: 1.1109... 0.1300 sec/batch Epoch: 5/20... Training Step: 1993... Training loss: 1.1794... 0.1300 sec/batch Epoch: 5/20... Training Step: 1994... Training loss: 1.4049... 0.1309 sec/batch Epoch: 5/20... Training Step: 1995... Training loss: 1.2522... 0.1308 sec/batch Epoch: 5/20... Training Step: 1996... Training loss: 1.4161... 0.1297 sec/batch Epoch: 5/20... Training Step: 1997... Training loss: 1.2564... 0.1243 sec/batch Epoch: 5/20... Training Step: 1998... Training loss: 1.2638... 0.1195 sec/batch Epoch: 5/20... Training Step: 1999... Training loss: 1.1311... 0.1188 sec/batch Epoch: 5/20... Training Step: 2000... Training loss: 1.4742... 0.1220 sec/batch Epoch: 5/20... Training Step: 2001... Training loss: 1.3373... 0.1201 sec/batch Epoch: 5/20... Training Step: 2002... Training loss: 1.2330... 0.1250 sec/batch Epoch: 5/20... Training Step: 2003... Training loss: 1.3269... 0.1252 sec/batch Epoch: 5/20... Training Step: 2004... Training loss: 1.3388... 0.1229 sec/batch Epoch: 5/20... Training Step: 2005... Training loss: 1.3626... 0.1262 sec/batch Epoch: 5/20... Training Step: 2006... Training loss: 1.5422... 0.1200 sec/batch Epoch: 5/20... Training Step: 2007... Training loss: 1.4364... 0.1206 sec/batch Epoch: 5/20... Training Step: 2008... Training loss: 1.4181... 0.1219 sec/batch Epoch: 5/20... Training Step: 2009... Training loss: 1.5090... 0.1242 sec/batch Epoch: 5/20... Training Step: 2010... Training loss: 1.3017... 0.1242 sec/batch Epoch: 5/20... Training Step: 2011... Training loss: 1.4210... 0.1232 sec/batch Epoch: 5/20... Training Step: 2012... Training loss: 1.2559... 0.1219 sec/batch Epoch: 5/20... Training Step: 2013... Training loss: 1.2238... 0.1236 sec/batch Epoch: 5/20... Training Step: 2014... Training loss: 1.2536... 0.1266 sec/batch Epoch: 5/20... Training Step: 2015... Training loss: 1.1755... 0.1243 sec/batch Epoch: 5/20... Training Step: 2016... Training loss: 1.2535... 0.1226 sec/batch Epoch: 5/20... Training Step: 2017... Training loss: 1.4850... 0.1190 sec/batch Epoch: 5/20... Training Step: 2018... Training loss: 1.3355... 0.1211 sec/batch Epoch: 5/20... Training Step: 2019... Training loss: 1.4649... 0.1212 sec/batch Epoch: 5/20... Training Step: 2020... Training loss: 1.2030... 0.1236 sec/batch Epoch: 5/20... Training Step: 2021... Training loss: 1.3589... 0.1196 sec/batch Epoch: 5/20... Training Step: 2022... Training loss: 1.2917... 0.1188 sec/batch Epoch: 5/20... Training Step: 2023... Training loss: 1.2187... 0.1251 sec/batch Epoch: 5/20... Training Step: 2024... Training loss: 1.5688... 0.1228 sec/batch Epoch: 5/20... Training Step: 2025... Training loss: 1.3659... 0.1200 sec/batch Epoch: 5/20... Training Step: 2026... Training loss: 1.4193... 0.1325 sec/batch Epoch: 5/20... Training Step: 2027... Training loss: 1.3733... 0.1341 sec/batch Epoch: 5/20... Training Step: 2028... Training loss: 1.4903... 0.1251 sec/batch Epoch: 5/20... Training Step: 2029... Training loss: 1.3682... 0.1219 sec/batch Epoch: 5/20... Training Step: 2030... Training loss: 1.4203... 0.1237 sec/batch Epoch: 5/20... Training Step: 2031... Training loss: 1.4826... 0.1158 sec/batch Epoch: 5/20... Training Step: 2032... Training loss: 1.1715... 0.1249 sec/batch Epoch: 5/20... Training Step: 2033... Training loss: 1.1239... 0.1208 sec/batch Epoch: 5/20... Training Step: 2034... Training loss: 1.4076... 0.1162 sec/batch Epoch: 5/20... Training Step: 2035... Training loss: 1.2335... 0.1242 sec/batch Epoch: 5/20... Training Step: 2036... Training loss: 1.4004... 0.1203 sec/batch Epoch: 5/20... Training Step: 2037... Training loss: 1.2360... 0.1220 sec/batch Epoch: 5/20... Training Step: 2038... Training loss: 1.4961... 0.1233 sec/batch Epoch: 5/20... Training Step: 2039... Training loss: 1.4774... 0.1221 sec/batch Epoch: 5/20... Training Step: 2040... Training loss: 1.3138... 0.1237 sec/batch Epoch: 5/20... Training Step: 2041... Training loss: 1.4879... 0.1273 sec/batch Epoch: 5/20... Training Step: 2042... Training loss: 1.4395... 0.1312 sec/batch Epoch: 5/20... Training Step: 2043... Training loss: 1.4202... 0.1251 sec/batch Epoch: 5/20... Training Step: 2044... Training loss: 1.1512... 0.1229 sec/batch Epoch: 5/20... Training Step: 2045... Training loss: 1.4685... 0.1202 sec/batch Epoch: 5/20... Training Step: 2046... Training loss: 1.2811... 0.1248 sec/batch Epoch: 5/20... Training Step: 2047... Training loss: 1.2285... 0.1291 sec/batch Epoch: 5/20... Training Step: 2048... Training loss: 1.5609... 0.1284 sec/batch Epoch: 5/20... Training Step: 2049... Training loss: 1.4412... 0.1253 sec/batch Epoch: 5/20... Training Step: 2050... Training loss: 1.2376... 0.1265 sec/batch Epoch: 5/20... Training Step: 2051... Training loss: 1.4113... 0.1338 sec/batch Epoch: 5/20... Training Step: 2052... Training loss: 1.3083... 0.1321 sec/batch Epoch: 5/20... Training Step: 2053... Training loss: 1.2489... 0.1240 sec/batch Epoch: 5/20... Training Step: 2054... Training loss: 1.4137... 0.1216 sec/batch Epoch: 5/20... Training Step: 2055... Training loss: 1.0055... 0.1286 sec/batch Epoch: 5/20... Training Step: 2056... Training loss: 1.2158... 0.1273 sec/batch Epoch: 5/20... Training Step: 2057... Training loss: 1.2625... 0.1221 sec/batch Epoch: 5/20... Training Step: 2058... Training loss: 1.2687... 0.1235 sec/batch Epoch: 5/20... Training Step: 2059... Training loss: 1.3201... 0.1393 sec/batch Epoch: 5/20... Training Step: 2060... Training loss: 1.5002... 0.1273 sec/batch Epoch: 5/20... Training Step: 2061... Training loss: 1.2850... 0.1207 sec/batch Epoch: 5/20... Training Step: 2062... Training loss: 1.1946... 0.1206 sec/batch Epoch: 5/20... Training Step: 2063... Training loss: 1.2273... 0.1231 sec/batch Epoch: 5/20... Training Step: 2064... Training loss: 1.3940... 0.1257 sec/batch Epoch: 5/20... Training Step: 2065... Training loss: 1.3493... 0.1207 sec/batch Epoch: 5/20... Training Step: 2066... Training loss: 1.1373... 0.1229 sec/batch Epoch: 5/20... Training Step: 2067... Training loss: 1.1226... 0.1202 sec/batch Epoch: 5/20... Training Step: 2068... Training loss: 1.4506... 0.1214 sec/batch Epoch: 5/20... Training Step: 2069... Training loss: 1.5134... 0.1205 sec/batch Epoch: 5/20... Training Step: 2070... Training loss: 1.3288... 0.1207 sec/batch Epoch: 5/20... Training Step: 2071... Training loss: 1.4938... 0.1267 sec/batch Epoch: 5/20... Training Step: 2072... Training loss: 1.2917... 0.1212 sec/batch Epoch: 5/20... Training Step: 2073... Training loss: 1.3838... 0.1245 sec/batch Epoch: 5/20... Training Step: 2074... Training loss: 1.2705... 0.1221 sec/batch Epoch: 5/20... Training Step: 2075... Training loss: 1.5055... 0.1240 sec/batch Epoch: 5/20... Training Step: 2076... Training loss: 1.2751... 0.1212 sec/batch Epoch: 5/20... Training Step: 2077... Training loss: 1.2524... 0.1229 sec/batch Epoch: 5/20... Training Step: 2078... Training loss: 1.4998... 0.1214 sec/batch Epoch: 5/20... Training Step: 2079... Training loss: 1.5108... 0.1255 sec/batch Epoch: 5/20... Training Step: 2080... Training loss: 1.5976... 0.1239 sec/batch Epoch: 5/20... Training Step: 2081... Training loss: 1.4327... 0.1278 sec/batch Epoch: 5/20... Training Step: 2082... Training loss: 1.5730... 0.1260 sec/batch Epoch: 5/20... Training Step: 2083... Training loss: 1.5314... 0.1323 sec/batch Epoch: 5/20... Training Step: 2084... Training loss: 1.1941... 0.1263 sec/batch Epoch: 5/20... Training Step: 2085... Training loss: 1.3786... 0.1315 sec/batch Epoch: 5/20... Training Step: 2086... Training loss: 1.3234... 0.1336 sec/batch Epoch: 5/20... Training Step: 2087... Training loss: 1.3384... 0.1253 sec/batch Epoch: 5/20... Training Step: 2088... Training loss: 1.2511... 0.1285 sec/batch Epoch: 5/20... Training Step: 2089... Training loss: 1.6410... 0.1235 sec/batch Epoch: 5/20... Training Step: 2090... Training loss: 1.3386... 0.1193 sec/batch Epoch: 5/20... Training Step: 2091... Training loss: 1.5113... 0.1273 sec/batch Epoch: 5/20... Training Step: 2092... Training loss: 1.3396... 0.1211 sec/batch Epoch: 5/20... Training Step: 2093... Training loss: 1.6290... 0.1292 sec/batch Epoch: 5/20... Training Step: 2094... Training loss: 1.2716... 0.1308 sec/batch Epoch: 5/20... Training Step: 2095... Training loss: 1.3236... 0.1286 sec/batch Epoch: 5/20... Training Step: 2096... Training loss: 1.4320... 0.1312 sec/batch Epoch: 5/20... Training Step: 2097... Training loss: 1.3221... 0.1297 sec/batch Epoch: 5/20... Training Step: 2098... Training loss: 1.2143... 0.1261 sec/batch Epoch: 5/20... Training Step: 2099... Training loss: 1.4509... 0.1210 sec/batch Epoch: 5/20... Training Step: 2100... Training loss: 1.4199... 0.1252 sec/batch Epoch: 5/20... Training Step: 2101... Training loss: 1.3452... 0.1240 sec/batch Epoch: 5/20... Training Step: 2102... Training loss: 1.1982... 0.1189 sec/batch Epoch: 5/20... Training Step: 2103... Training loss: 1.3105... 0.1236 sec/batch Epoch: 5/20... Training Step: 2104... Training loss: 1.4242... 0.1205 sec/batch Epoch: 5/20... Training Step: 2105... Training loss: 1.3309... 0.1208 sec/batch Epoch: 5/20... Training Step: 2106... Training loss: 1.2867... 0.1221 sec/batch Epoch: 5/20... Training Step: 2107... Training loss: 1.5059... 0.1287 sec/batch Epoch: 5/20... Training Step: 2108... Training loss: 1.2699... 0.1256 sec/batch Epoch: 5/20... Training Step: 2109... Training loss: 1.2599... 0.1243 sec/batch Epoch: 5/20... Training Step: 2110... Training loss: 1.3264... 0.1209 sec/batch Epoch: 5/20... Training Step: 2111... Training loss: 1.3123... 0.1191 sec/batch Epoch: 5/20... Training Step: 2112... Training loss: 1.4189... 0.1174 sec/batch Epoch: 5/20... Training Step: 2113... Training loss: 1.4893... 0.1279 sec/batch Epoch: 5/20... Training Step: 2114... Training loss: 1.1976... 0.1254 sec/batch Epoch: 5/20... Training Step: 2115... Training loss: 1.1844... 0.1213 sec/batch Epoch: 5/20... Training Step: 2116... Training loss: 1.2370... 0.1286 sec/batch Epoch: 5/20... Training Step: 2117... Training loss: 1.2447... 0.1308 sec/batch Epoch: 5/20... Training Step: 2118... Training loss: 1.4092... 0.1261 sec/batch Epoch: 5/20... Training Step: 2119... Training loss: 1.3564... 0.1290 sec/batch Epoch: 5/20... Training Step: 2120... Training loss: 1.4175... 0.1282 sec/batch Epoch: 5/20... Training Step: 2121... Training loss: 1.5402... 0.1307 sec/batch Epoch: 5/20... Training Step: 2122... Training loss: 1.3617... 0.1244 sec/batch Epoch: 5/20... Training Step: 2123... Training loss: 1.6011... 0.1165 sec/batch Epoch: 5/20... Training Step: 2124... Training loss: 1.4850... 0.1316 sec/batch Epoch: 5/20... Training Step: 2125... Training loss: 1.3993... 0.1218 sec/batch Epoch: 5/20... Training Step: 2126... Training loss: 1.4731... 0.1224 sec/batch Epoch: 5/20... Training Step: 2127... Training loss: 1.4577... 0.1325 sec/batch Epoch: 5/20... Training Step: 2128... Training loss: 1.4428... 0.1301 sec/batch Epoch: 5/20... Training Step: 2129... Training loss: 1.5331... 0.1209 sec/batch Epoch: 5/20... Training Step: 2130... Training loss: 1.3790... 0.1200 sec/batch Epoch: 5/20... Training Step: 2131... Training loss: 1.4644... 0.1229 sec/batch Epoch: 5/20... Training Step: 2132... Training loss: 1.3375... 0.1204 sec/batch Epoch: 5/20... Training Step: 2133... Training loss: 1.4570... 0.1232 sec/batch Epoch: 5/20... Training Step: 2134... Training loss: 1.6120... 0.1202 sec/batch Epoch: 5/20... Training Step: 2135... Training loss: 1.3700... 0.1219 sec/batch Epoch: 5/20... Training Step: 2136... Training loss: 1.3754... 0.1206 sec/batch Epoch: 5/20... Training Step: 2137... Training loss: 1.2274... 0.1173 sec/batch Epoch: 5/20... Training Step: 2138... Training loss: 1.2678... 0.1255 sec/batch Epoch: 5/20... Training Step: 2139... Training loss: 1.2651... 0.1268 sec/batch Epoch: 5/20... Training Step: 2140... Training loss: 1.4352... 0.1201 sec/batch Epoch: 5/20... Training Step: 2141... Training loss: 1.1677... 0.1194 sec/batch Epoch: 5/20... Training Step: 2142... Training loss: 1.3226... 0.1230 sec/batch Epoch: 5/20... Training Step: 2143... Training loss: 1.3909... 0.1217 sec/batch Epoch: 5/20... Training Step: 2144... Training loss: 1.4291... 0.1218 sec/batch Epoch: 5/20... Training Step: 2145... Training loss: 1.4847... 0.1200 sec/batch Epoch: 5/20... Training Step: 2146... Training loss: 1.4530... 0.1212 sec/batch Epoch: 5/20... Training Step: 2147... Training loss: 1.4686... 0.1188 sec/batch Epoch: 5/20... Training Step: 2148... Training loss: 1.2717... 0.1255 sec/batch Epoch: 5/20... Training Step: 2149... Training loss: 1.2172... 0.1174 sec/batch Epoch: 5/20... Training Step: 2150... Training loss: 1.3201... 0.1261 sec/batch Epoch: 5/20... Training Step: 2151... Training loss: 1.3613... 0.1223 sec/batch Epoch: 5/20... Training Step: 2152... Training loss: 1.5339... 0.1200 sec/batch Epoch: 5/20... Training Step: 2153... Training loss: 1.2476... 0.1256 sec/batch Epoch: 5/20... Training Step: 2154... Training loss: 1.3315... 0.1247 sec/batch Epoch: 5/20... Training Step: 2155... Training loss: 1.3830... 0.1207 sec/batch Epoch: 5/20... Training Step: 2156... Training loss: 1.2626... 0.1240 sec/batch Epoch: 5/20... Training Step: 2157... Training loss: 1.4222... 0.1264 sec/batch Epoch: 5/20... Training Step: 2158... Training loss: 1.3713... 0.1262 sec/batch Epoch: 5/20... Training Step: 2159... Training loss: 1.1779... 0.1219 sec/batch Epoch: 5/20... Training Step: 2160... Training loss: 1.5389... 0.1224 sec/batch Epoch: 5/20... Training Step: 2161... Training loss: 1.1701... 0.1180 sec/batch Epoch: 5/20... Training Step: 2162... Training loss: 1.4128... 0.1177 sec/batch Epoch: 5/20... Training Step: 2163... Training loss: 1.3217... 0.1235 sec/batch Epoch: 5/20... Training Step: 2164... Training loss: 1.7374... 0.1249 sec/batch Epoch: 5/20... Training Step: 2165... Training loss: 1.3241... 0.1240 sec/batch Epoch: 5/20... Training Step: 2166... Training loss: 1.4510... 0.1205 sec/batch Epoch: 5/20... Training Step: 2167... Training loss: 1.3563... 0.1223 sec/batch Epoch: 5/20... Training Step: 2168... Training loss: 1.1241... 0.1207 sec/batch Epoch: 5/20... Training Step: 2169... Training loss: 1.3653... 0.1251 sec/batch Epoch: 5/20... Training Step: 2170... Training loss: 1.3491... 0.1238 sec/batch Epoch: 5/20... Training Step: 2171... Training loss: 1.0264... 0.1245 sec/batch Epoch: 5/20... Training Step: 2172... Training loss: 1.2446... 0.1219 sec/batch Epoch: 5/20... Training Step: 2173... Training loss: 1.3225... 0.1177 sec/batch Epoch: 5/20... Training Step: 2174... Training loss: 1.2322... 0.1171 sec/batch Epoch: 5/20... Training Step: 2175... Training loss: 1.3576... 0.1215 sec/batch Epoch: 5/20... Training Step: 2176... Training loss: 1.2236... 0.1224 sec/batch Epoch: 5/20... Training Step: 2177... Training loss: 1.1469... 0.1203 sec/batch Epoch: 5/20... Training Step: 2178... Training loss: 1.4571... 0.1256 sec/batch Epoch: 5/20... Training Step: 2179... Training loss: 1.2698... 0.1237 sec/batch Epoch: 5/20... Training Step: 2180... Training loss: 1.1421... 0.1253 sec/batch Epoch: 5/20... Training Step: 2181... Training loss: 1.1433... 0.1213 sec/batch Epoch: 5/20... Training Step: 2182... Training loss: 1.2757... 0.1231 sec/batch Epoch: 5/20... Training Step: 2183... Training loss: 1.3471... 0.1218 sec/batch Epoch: 5/20... Training Step: 2184... Training loss: 1.3731... 0.1208 sec/batch Epoch: 5/20... Training Step: 2185... Training loss: 1.3316... 0.1237 sec/batch Epoch: 5/20... Training Step: 2186... Training loss: 1.1254... 0.1246 sec/batch Epoch: 5/20... Training Step: 2187... Training loss: 1.4053... 0.1191 sec/batch Epoch: 5/20... Training Step: 2188... Training loss: 1.3396... 0.1237 sec/batch Epoch: 5/20... Training Step: 2189... Training loss: 1.2039... 0.1249 sec/batch Epoch: 5/20... Training Step: 2190... Training loss: 1.3069... 0.1183 sec/batch Epoch: 5/20... Training Step: 2191... Training loss: 1.4495... 0.1211 sec/batch Epoch: 5/20... Training Step: 2192... Training loss: 1.1505... 0.1235 sec/batch Epoch: 5/20... Training Step: 2193... Training loss: 1.3098... 0.1222 sec/batch Epoch: 5/20... Training Step: 2194... Training loss: 1.1587... 0.1242 sec/batch Epoch: 5/20... Training Step: 2195... Training loss: 1.4081... 0.1266 sec/batch Epoch: 5/20... Training Step: 2196... Training loss: 1.2032... 0.1248 sec/batch Epoch: 5/20... Training Step: 2197... Training loss: 1.1867... 0.1177 sec/batch Epoch: 5/20... Training Step: 2198... Training loss: 1.4075... 0.1234 sec/batch Epoch: 5/20... Training Step: 2199... Training loss: 1.0782... 0.1249 sec/batch Epoch: 5/20... Training Step: 2200... Training loss: 1.4022... 0.1191 sec/batch Epoch: 5/20... Training Step: 2201... Training loss: 1.2112... 0.1221 sec/batch Epoch: 5/20... Training Step: 2202... Training loss: 1.0665... 0.1220 sec/batch Epoch: 5/20... Training Step: 2203... Training loss: 1.1189... 0.1227 sec/batch Epoch: 5/20... Training Step: 2204... Training loss: 1.4799... 0.1209 sec/batch Epoch: 5/20... Training Step: 2205... Training loss: 1.2105... 0.1260 sec/batch Epoch: 5/20... Training Step: 2206... Training loss: 1.3033... 0.1197 sec/batch Epoch: 5/20... Training Step: 2207... Training loss: 1.2723... 0.1227 sec/batch Epoch: 5/20... Training Step: 2208... Training loss: 1.1299... 0.1211 sec/batch Epoch: 5/20... Training Step: 2209... Training loss: 1.0649... 0.1209 sec/batch Epoch: 5/20... Training Step: 2210... Training loss: 1.0279... 0.1237 sec/batch Epoch: 5/20... Training Step: 2211... Training loss: 1.3164... 0.1186 sec/batch Epoch: 5/20... Training Step: 2212... Training loss: 1.2554... 0.1235 sec/batch Epoch: 5/20... Training Step: 2213... Training loss: 1.2331... 0.1203 sec/batch Epoch: 5/20... Training Step: 2214... Training loss: 1.3963... 0.1192 sec/batch Epoch: 5/20... Training Step: 2215... Training loss: 1.4784... 0.1168 sec/batch Epoch: 5/20... Training Step: 2216... Training loss: 1.1013... 0.1231 sec/batch Epoch: 5/20... Training Step: 2217... Training loss: 1.4188... 0.1231 sec/batch Epoch: 5/20... Training Step: 2218... Training loss: 1.3214... 0.1239 sec/batch Epoch: 5/20... Training Step: 2219... Training loss: 1.3651... 0.1288 sec/batch Epoch: 5/20... Training Step: 2220... Training loss: 1.2781... 0.1258 sec/batch Epoch: 5/20... Training Step: 2221... Training loss: 1.2946... 0.1267 sec/batch Epoch: 5/20... Training Step: 2222... Training loss: 1.3607... 0.1199 sec/batch Epoch: 5/20... Training Step: 2223... Training loss: 1.2041... 0.1219 sec/batch Epoch: 5/20... Training Step: 2224... Training loss: 1.4460... 0.1196 sec/batch Epoch: 5/20... Training Step: 2225... Training loss: 1.4566... 0.1222 sec/batch Epoch: 5/20... Training Step: 2226... Training loss: 1.2434... 0.1268 sec/batch Epoch: 5/20... Training Step: 2227... Training loss: 1.1295... 0.1265 sec/batch Epoch: 5/20... Training Step: 2228... Training loss: 1.3996... 0.1202 sec/batch Epoch: 5/20... Training Step: 2229... Training loss: 1.2414... 0.1186 sec/batch Epoch: 5/20... Training Step: 2230... Training loss: 1.4211... 0.1176 sec/batch Epoch: 5/20... Training Step: 2231... Training loss: 1.2947... 0.1223 sec/batch Epoch: 5/20... Training Step: 2232... Training loss: 1.2941... 0.1208 sec/batch Epoch: 5/20... Training Step: 2233... Training loss: 1.4468... 0.1201 sec/batch Epoch: 5/20... Training Step: 2234... Training loss: 1.4240... 0.1227 sec/batch Epoch: 5/20... Training Step: 2235... Training loss: 1.2270... 0.1216 sec/batch Epoch: 5/20... Training Step: 2236... Training loss: 1.4344... 0.1219 sec/batch Epoch: 5/20... Training Step: 2237... Training loss: 1.1316... 0.1211 sec/batch Epoch: 5/20... Training Step: 2238... Training loss: 1.1930... 0.1236 sec/batch Epoch: 5/20... Training Step: 2239... Training loss: 1.2547... 0.1173 sec/batch Epoch: 5/20... Training Step: 2240... Training loss: 1.3189... 0.1208 sec/batch Epoch: 5/20... Training Step: 2241... Training loss: 1.1847... 0.1217 sec/batch Epoch: 5/20... Training Step: 2242... Training loss: 1.2743... 0.1204 sec/batch Epoch: 5/20... Training Step: 2243... Training loss: 1.3364... 0.1240 sec/batch Epoch: 5/20... Training Step: 2244... Training loss: 1.1097... 0.1209 sec/batch Epoch: 5/20... Training Step: 2245... Training loss: 1.2344... 0.1229 sec/batch Epoch: 5/20... Training Step: 2246... Training loss: 1.3282... 0.1238 sec/batch Epoch: 5/20... Training Step: 2247... Training loss: 1.1812... 0.1285 sec/batch Epoch: 5/20... Training Step: 2248... Training loss: 1.2016... 0.1242 sec/batch Epoch: 5/20... Training Step: 2249... Training loss: 1.2162... 0.1287 sec/batch Epoch: 5/20... Training Step: 2250... Training loss: 1.2379... 0.1281 sec/batch Epoch: 5/20... Training Step: 2251... Training loss: 1.1340... 0.1266 sec/batch Epoch: 5/20... Training Step: 2252... Training loss: 1.4828... 0.1338 sec/batch Epoch: 5/20... Training Step: 2253... Training loss: 1.2306... 0.1288 sec/batch Epoch: 5/20... Training Step: 2254... Training loss: 1.1628... 0.1235 sec/batch Epoch: 5/20... Training Step: 2255... Training loss: 1.2777... 0.1235 sec/batch Epoch: 5/20... Training Step: 2256... Training loss: 1.2684... 0.1269 sec/batch Epoch: 5/20... Training Step: 2257... Training loss: 1.2504... 0.1259 sec/batch Epoch: 5/20... Training Step: 2258... Training loss: 1.2345... 0.1280 sec/batch Epoch: 5/20... Training Step: 2259... Training loss: 1.1592... 0.1308 sec/batch Epoch: 5/20... Training Step: 2260... Training loss: 1.3156... 0.1366 sec/batch Epoch: 5/20... Training Step: 2261... Training loss: 1.2982... 0.1433 sec/batch Epoch: 5/20... Training Step: 2262... Training loss: 1.5661... 0.1304 sec/batch Epoch: 5/20... Training Step: 2263... Training loss: 1.2208... 0.1253 sec/batch Epoch: 5/20... Training Step: 2264... Training loss: 1.4968... 0.1316 sec/batch Epoch: 5/20... Training Step: 2265... Training loss: 1.4126... 0.1250 sec/batch Epoch: 5/20... Training Step: 2266... Training loss: 1.2998... 0.1282 sec/batch Epoch: 5/20... Training Step: 2267... Training loss: 1.1956... 0.1308 sec/batch Epoch: 5/20... Training Step: 2268... Training loss: 1.4148... 0.1302 sec/batch Epoch: 5/20... Training Step: 2269... Training loss: 1.3953... 0.1282 sec/batch Epoch: 5/20... Training Step: 2270... Training loss: 1.3235... 0.1295 sec/batch Epoch: 5/20... Training Step: 2271... Training loss: 1.5398... 0.1322 sec/batch Epoch: 5/20... Training Step: 2272... Training loss: 1.4907... 0.1231 sec/batch Epoch: 5/20... Training Step: 2273... Training loss: 1.4090... 0.1257 sec/batch Epoch: 5/20... Training Step: 2274... Training loss: 1.1847... 0.1181 sec/batch Epoch: 5/20... Training Step: 2275... Training loss: 1.3121... 0.1379 sec/batch Epoch: 5/20... Training Step: 2276... Training loss: 1.0537... 0.1335 sec/batch Epoch: 5/20... Training Step: 2277... Training loss: 1.3853... 0.1488 sec/batch Epoch: 5/20... Training Step: 2278... Training loss: 1.3854... 0.1264 sec/batch Epoch: 5/20... Training Step: 2279... Training loss: 1.4398... 0.1309 sec/batch Epoch: 5/20... Training Step: 2280... Training loss: 1.5588... 0.1327 sec/batch Epoch: 5/20... Training Step: 2281... Training loss: 1.2669... 0.1276 sec/batch Epoch: 5/20... Training Step: 2282... Training loss: 1.3803... 0.1310 sec/batch Epoch: 5/20... Training Step: 2283... Training loss: 1.4367... 0.1296 sec/batch Epoch: 5/20... Training Step: 2284... Training loss: 1.4695... 0.1346 sec/batch Epoch: 5/20... Training Step: 2285... Training loss: 1.1804... 0.1366 sec/batch Epoch: 5/20... Training Step: 2286... Training loss: 1.3090... 0.1370 sec/batch Epoch: 5/20... Training Step: 2287... Training loss: 1.4822... 0.1354 sec/batch Epoch: 5/20... Training Step: 2288... Training loss: 1.3859... 0.1343 sec/batch Epoch: 5/20... Training Step: 2289... Training loss: 1.5533... 0.1243 sec/batch Epoch: 5/20... Training Step: 2290... Training loss: 1.4981... 0.1305 sec/batch Epoch: 5/20... Training Step: 2291... Training loss: 1.3120... 0.1345 sec/batch Epoch: 5/20... Training Step: 2292... Training loss: 1.3983... 0.1309 sec/batch Epoch: 5/20... Training Step: 2293... Training loss: 1.1974... 0.1382 sec/batch Epoch: 5/20... Training Step: 2294... Training loss: 1.5368... 0.1251 sec/batch Epoch: 5/20... Training Step: 2295... Training loss: 1.5380... 0.1457 sec/batch Epoch: 5/20... Training Step: 2296... Training loss: 1.6047... 0.1546 sec/batch Epoch: 5/20... Training Step: 2297... Training loss: 1.3679... 0.1530 sec/batch Epoch: 5/20... Training Step: 2298... Training loss: 1.2772... 0.1483 sec/batch Epoch: 5/20... Training Step: 2299... Training loss: 1.4528... 0.1311 sec/batch Epoch: 5/20... Training Step: 2300... Training loss: 1.2750... 0.1419 sec/batch Epoch: 5/20... Training Step: 2301... Training loss: 1.2816... 0.1385 sec/batch Epoch: 5/20... Training Step: 2302... Training loss: 1.3153... 0.1384 sec/batch Epoch: 5/20... Training Step: 2303... Training loss: 1.3220... 0.1355 sec/batch Epoch: 5/20... Training Step: 2304... Training loss: 1.2907... 0.1351 sec/batch Epoch: 5/20... Training Step: 2305... Training loss: 1.3137... 0.1494 sec/batch Epoch: 5/20... Training Step: 2306... Training loss: 1.3615... 0.1339 sec/batch Epoch: 5/20... Training Step: 2307... Training loss: 1.2708... 0.1317 sec/batch Epoch: 5/20... Training Step: 2308... Training loss: 1.3530... 0.1351 sec/batch Epoch: 5/20... Training Step: 2309... Training loss: 1.2334... 0.1302 sec/batch Epoch: 5/20... Training Step: 2310... Training loss: 1.6088... 0.1386 sec/batch Epoch: 5/20... Training Step: 2311... Training loss: 1.3722... 0.1317 sec/batch Epoch: 5/20... Training Step: 2312... Training loss: 1.1236... 0.1342 sec/batch Epoch: 5/20... Training Step: 2313... Training loss: 1.2260... 0.1341 sec/batch Epoch: 5/20... Training Step: 2314... Training loss: 1.1246... 0.1322 sec/batch Epoch: 5/20... Training Step: 2315... Training loss: 1.1872... 0.1337 sec/batch Epoch: 5/20... Training Step: 2316... Training loss: 1.2634... 0.1294 sec/batch Epoch: 5/20... Training Step: 2317... Training loss: 1.4518... 0.1356 sec/batch Epoch: 5/20... Training Step: 2318... Training loss: 1.2658... 0.1281 sec/batch Epoch: 5/20... Training Step: 2319... Training loss: 1.4098... 0.1212 sec/batch Epoch: 5/20... Training Step: 2320... Training loss: 1.2009... 0.1197 sec/batch Epoch: 6/20... Training Step: 2321... Training loss: 1.6311... 0.1237 sec/batch Epoch: 6/20... Training Step: 2322... Training loss: 1.3930... 0.1246 sec/batch Epoch: 6/20... Training Step: 2323... Training loss: 1.3058... 0.1211 sec/batch Epoch: 6/20... Training Step: 2324... Training loss: 1.2827... 0.1264 sec/batch Epoch: 6/20... Training Step: 2325... Training loss: 1.3382... 0.1179 sec/batch Epoch: 6/20... Training Step: 2326... Training loss: 1.1043... 0.1229 sec/batch Epoch: 6/20... Training Step: 2327... Training loss: 1.4317... 0.1271 sec/batch Epoch: 6/20... Training Step: 2328... Training loss: 1.1889... 0.1190 sec/batch Epoch: 6/20... Training Step: 2329... Training loss: 1.2613... 0.1216 sec/batch Epoch: 6/20... Training Step: 2330... Training loss: 1.3218... 0.1193 sec/batch Epoch: 6/20... Training Step: 2331... Training loss: 1.2366... 0.1186 sec/batch Epoch: 6/20... Training Step: 2332... Training loss: 1.0599... 0.1200 sec/batch Epoch: 6/20... Training Step: 2333... Training loss: 1.4858... 0.1217 sec/batch Epoch: 6/20... Training Step: 2334... Training loss: 1.0154... 0.1261 sec/batch Epoch: 6/20... Training Step: 2335... Training loss: 1.3003... 0.1286 sec/batch Epoch: 6/20... Training Step: 2336... Training loss: 1.3698... 0.1231 sec/batch Epoch: 6/20... Training Step: 2337... Training loss: 1.1545... 0.1205 sec/batch Epoch: 6/20... Training Step: 2338... Training loss: 1.1700... 0.1207 sec/batch Epoch: 6/20... Training Step: 2339... Training loss: 1.2990... 0.1230 sec/batch Epoch: 6/20... Training Step: 2340... Training loss: 1.1251... 0.1248 sec/batch Epoch: 6/20... Training Step: 2341... Training loss: 1.3312... 0.1210 sec/batch Epoch: 6/20... Training Step: 2342... Training loss: 1.2236... 0.1220 sec/batch Epoch: 6/20... Training Step: 2343... Training loss: 1.4102... 0.1248 sec/batch Epoch: 6/20... Training Step: 2344... Training loss: 1.1989... 0.1256 sec/batch Epoch: 6/20... Training Step: 2345... Training loss: 1.2348... 0.1211 sec/batch Epoch: 6/20... Training Step: 2346... Training loss: 1.3191... 0.1217 sec/batch Epoch: 6/20... Training Step: 2347... Training loss: 1.3903... 0.1200 sec/batch Epoch: 6/20... Training Step: 2348... Training loss: 1.0599... 0.1242 sec/batch Epoch: 6/20... Training Step: 2349... Training loss: 1.2737... 0.1183 sec/batch Epoch: 6/20... Training Step: 2350... Training loss: 1.2724... 0.1209 sec/batch Epoch: 6/20... Training Step: 2351... Training loss: 1.0888... 0.1266 sec/batch Epoch: 6/20... Training Step: 2352... Training loss: 1.2256... 0.1196 sec/batch Epoch: 6/20... Training Step: 2353... Training loss: 1.0456... 0.1243 sec/batch Epoch: 6/20... Training Step: 2354... Training loss: 1.0945... 0.1174 sec/batch Epoch: 6/20... Training Step: 2355... Training loss: 1.1539... 0.1204 sec/batch Epoch: 6/20... Training Step: 2356... Training loss: 1.2595... 0.1191 sec/batch Epoch: 6/20... Training Step: 2357... Training loss: 1.2769... 0.1265 sec/batch Epoch: 6/20... Training Step: 2358... Training loss: 1.0836... 0.1214 sec/batch Epoch: 6/20... Training Step: 2359... Training loss: 1.1771... 0.1263 sec/batch Epoch: 6/20... Training Step: 2360... Training loss: 1.4154... 0.1202 sec/batch Epoch: 6/20... Training Step: 2361... Training loss: 1.3142... 0.1215 sec/batch Epoch: 6/20... Training Step: 2362... Training loss: 1.1953... 0.1244 sec/batch Epoch: 6/20... Training Step: 2363... Training loss: 1.3591... 0.1226 sec/batch Epoch: 6/20... Training Step: 2364... Training loss: 1.0060... 0.1268 sec/batch Epoch: 6/20... Training Step: 2365... Training loss: 1.2036... 0.1237 sec/batch Epoch: 6/20... Training Step: 2366... Training loss: 1.1471... 0.1211 sec/batch Epoch: 6/20... Training Step: 2367... Training loss: 1.2212... 0.1229 sec/batch Epoch: 6/20... Training Step: 2368... Training loss: 1.2131... 0.1190 sec/batch Epoch: 6/20... Training Step: 2369... Training loss: 1.2261... 0.1263 sec/batch Epoch: 6/20... Training Step: 2370... Training loss: 1.3443... 0.1243 sec/batch Epoch: 6/20... Training Step: 2371... Training loss: 1.2670... 0.1235 sec/batch Epoch: 6/20... Training Step: 2372... Training loss: 1.2596... 0.1217 sec/batch Epoch: 6/20... Training Step: 2373... Training loss: 1.4464... 0.1225 sec/batch Epoch: 6/20... Training Step: 2374... Training loss: 1.4029... 0.1227 sec/batch Epoch: 6/20... Training Step: 2375... Training loss: 1.0272... 0.1220 sec/batch Epoch: 6/20... Training Step: 2376... Training loss: 1.1753... 0.1230 sec/batch Epoch: 6/20... Training Step: 2377... Training loss: 1.3865... 0.1190 sec/batch Epoch: 6/20... Training Step: 2378... Training loss: 1.3641... 0.1202 sec/batch Epoch: 6/20... Training Step: 2379... Training loss: 1.0820... 0.1230 sec/batch Epoch: 6/20... Training Step: 2380... Training loss: 1.1591... 0.1218 sec/batch Epoch: 6/20... Training Step: 2381... Training loss: 1.1741... 0.1222 sec/batch Epoch: 6/20... Training Step: 2382... Training loss: 1.4185... 0.1227 sec/batch Epoch: 6/20... Training Step: 2383... Training loss: 1.2276... 0.1212 sec/batch Epoch: 6/20... Training Step: 2384... Training loss: 1.2727... 0.1269 sec/batch Epoch: 6/20... Training Step: 2385... Training loss: 1.0896... 0.1365 sec/batch Epoch: 6/20... Training Step: 2386... Training loss: 1.4431... 0.1241 sec/batch Epoch: 6/20... Training Step: 2387... Training loss: 1.2559... 0.1264 sec/batch Epoch: 6/20... Training Step: 2388... Training loss: 1.3154... 0.1207 sec/batch Epoch: 6/20... Training Step: 2389... Training loss: 1.1405... 0.1225 sec/batch Epoch: 6/20... Training Step: 2390... Training loss: 1.2699... 0.1225 sec/batch Epoch: 6/20... Training Step: 2391... Training loss: 1.4147... 0.1253 sec/batch Epoch: 6/20... Training Step: 2392... Training loss: 1.2288... 0.1194 sec/batch Epoch: 6/20... Training Step: 2393... Training loss: 1.3025... 0.1268 sec/batch Epoch: 6/20... Training Step: 2394... Training loss: 1.1623... 0.1224 sec/batch Epoch: 6/20... Training Step: 2395... Training loss: 1.4626... 0.1214 sec/batch Epoch: 6/20... Training Step: 2396... Training loss: 1.1778... 0.1247 sec/batch Epoch: 6/20... Training Step: 2397... Training loss: 1.1353... 0.1226 sec/batch Epoch: 6/20... Training Step: 2398... Training loss: 1.2984... 0.1285 sec/batch Epoch: 6/20... Training Step: 2399... Training loss: 1.2978... 0.1203 sec/batch Epoch: 6/20... Training Step: 2400... Training loss: 1.1298... 0.1199 sec/batch Epoch: 6/20... Training Step: 2401... Training loss: 1.4595... 0.1219 sec/batch Epoch: 6/20... Training Step: 2402... Training loss: 1.2626... 0.1208 sec/batch Epoch: 6/20... Training Step: 2403... Training loss: 1.2375... 0.1228 sec/batch Epoch: 6/20... Training Step: 2404... Training loss: 1.4104... 0.1207 sec/batch Epoch: 6/20... Training Step: 2405... Training loss: 1.2687... 0.1203 sec/batch Epoch: 6/20... Training Step: 2406... Training loss: 1.2620... 0.1222 sec/batch Epoch: 6/20... Training Step: 2407... Training loss: 1.1840... 0.1194 sec/batch Epoch: 6/20... Training Step: 2408... Training loss: 1.3223... 0.1195 sec/batch Epoch: 6/20... Training Step: 2409... Training loss: 1.4587... 0.1214 sec/batch Epoch: 6/20... Training Step: 2410... Training loss: 1.2219... 0.1207 sec/batch Epoch: 6/20... Training Step: 2411... Training loss: 1.3596... 0.1224 sec/batch Epoch: 6/20... Training Step: 2412... Training loss: 1.5247... 0.1234 sec/batch Epoch: 6/20... Training Step: 2413... Training loss: 1.0828... 0.1264 sec/batch Epoch: 6/20... Training Step: 2414... Training loss: 1.2971... 0.1184 sec/batch Epoch: 6/20... Training Step: 2415... Training loss: 1.3302... 0.1219 sec/batch Epoch: 6/20... Training Step: 2416... Training loss: 1.2316... 0.1236 sec/batch Epoch: 6/20... Training Step: 2417... Training loss: 1.5149... 0.1278 sec/batch Epoch: 6/20... Training Step: 2418... Training loss: 1.3349... 0.1216 sec/batch Epoch: 6/20... Training Step: 2419... Training loss: 1.3305... 0.1198 sec/batch Epoch: 6/20... Training Step: 2420... Training loss: 1.1834... 0.1214 sec/batch Epoch: 6/20... Training Step: 2421... Training loss: 1.3429... 0.1213 sec/batch Epoch: 6/20... Training Step: 2422... Training loss: 1.5014... 0.1210 sec/batch Epoch: 6/20... Training Step: 2423... Training loss: 1.4785... 0.1211 sec/batch Epoch: 6/20... Training Step: 2424... Training loss: 1.2383... 0.1231 sec/batch Epoch: 6/20... Training Step: 2425... Training loss: 1.4800... 0.1201 sec/batch Epoch: 6/20... Training Step: 2426... Training loss: 1.5512... 0.1184 sec/batch Epoch: 6/20... Training Step: 2427... Training loss: 1.4062... 0.1216 sec/batch Epoch: 6/20... Training Step: 2428... Training loss: 1.4362... 0.1213 sec/batch Epoch: 6/20... Training Step: 2429... Training loss: 1.4424... 0.1228 sec/batch Epoch: 6/20... Training Step: 2430... Training loss: 1.2157... 0.1208 sec/batch Epoch: 6/20... Training Step: 2431... Training loss: 1.3395... 0.1193 sec/batch Epoch: 6/20... Training Step: 2432... Training loss: 1.2810... 0.1222 sec/batch Epoch: 6/20... Training Step: 2433... Training loss: 1.3109... 0.1221 sec/batch Epoch: 6/20... Training Step: 2434... Training loss: 1.5148... 0.1247 sec/batch Epoch: 6/20... Training Step: 2435... Training loss: 1.3909... 0.1228 sec/batch Epoch: 6/20... Training Step: 2436... Training loss: 1.1984... 0.1239 sec/batch Epoch: 6/20... Training Step: 2437... Training loss: 1.4642... 0.1215 sec/batch Epoch: 6/20... Training Step: 2438... Training loss: 1.4342... 0.1207 sec/batch Epoch: 6/20... Training Step: 2439... Training loss: 1.3017... 0.1224 sec/batch Epoch: 6/20... Training Step: 2440... Training loss: 1.1203... 0.1204 sec/batch Epoch: 6/20... Training Step: 2441... Training loss: 1.3539... 0.1222 sec/batch Epoch: 6/20... Training Step: 2442... Training loss: 1.3851... 0.1246 sec/batch Epoch: 6/20... Training Step: 2443... Training loss: 1.3193... 0.1214 sec/batch Epoch: 6/20... Training Step: 2444... Training loss: 1.5250... 0.1198 sec/batch Epoch: 6/20... Training Step: 2445... Training loss: 1.3380... 0.1219 sec/batch Epoch: 6/20... Training Step: 2446... Training loss: 1.1748... 0.1196 sec/batch Epoch: 6/20... Training Step: 2447... Training loss: 1.2455... 0.1241 sec/batch Epoch: 6/20... Training Step: 2448... Training loss: 1.3589... 0.1228 sec/batch Epoch: 6/20... Training Step: 2449... Training loss: 1.3846... 0.1230 sec/batch Epoch: 6/20... Training Step: 2450... Training loss: 1.2957... 0.1212 sec/batch Epoch: 6/20... Training Step: 2451... Training loss: 1.4721... 0.1262 sec/batch Epoch: 6/20... Training Step: 2452... Training loss: 1.3400... 0.1339 sec/batch Epoch: 6/20... Training Step: 2453... Training loss: 1.3076... 0.1217 sec/batch Epoch: 6/20... Training Step: 2454... Training loss: 1.4066... 0.1233 sec/batch Epoch: 6/20... Training Step: 2455... Training loss: 1.2339... 0.1223 sec/batch Epoch: 6/20... Training Step: 2456... Training loss: 1.0549... 0.1212 sec/batch Epoch: 6/20... Training Step: 2457... Training loss: 1.1928... 0.1267 sec/batch Epoch: 6/20... Training Step: 2458... Training loss: 1.3019... 0.1396 sec/batch Epoch: 6/20... Training Step: 2459... Training loss: 1.2046... 0.1308 sec/batch Epoch: 6/20... Training Step: 2460... Training loss: 1.2998... 0.1185 sec/batch Epoch: 6/20... Training Step: 2461... Training loss: 1.2088... 0.1215 sec/batch Epoch: 6/20... Training Step: 2462... Training loss: 1.2659... 0.1196 sec/batch Epoch: 6/20... Training Step: 2463... Training loss: 1.1465... 0.1244 sec/batch Epoch: 6/20... Training Step: 2464... Training loss: 1.3121... 0.1207 sec/batch Epoch: 6/20... Training Step: 2465... Training loss: 1.2393... 0.1201 sec/batch Epoch: 6/20... Training Step: 2466... Training loss: 1.2197... 0.1207 sec/batch Epoch: 6/20... Training Step: 2467... Training loss: 1.3598... 0.1209 sec/batch Epoch: 6/20... Training Step: 2468... Training loss: 1.1963... 0.1239 sec/batch Epoch: 6/20... Training Step: 2469... Training loss: 1.2691... 0.1231 sec/batch Epoch: 6/20... Training Step: 2470... Training loss: 1.3611... 0.1230 sec/batch Epoch: 6/20... Training Step: 2471... Training loss: 1.3896... 0.1249 sec/batch Epoch: 6/20... Training Step: 2472... Training loss: 1.3983... 0.1246 sec/batch Epoch: 6/20... Training Step: 2473... Training loss: 1.4176... 0.1214 sec/batch Epoch: 6/20... Training Step: 2474... Training loss: 1.3656... 0.1234 sec/batch Epoch: 6/20... Training Step: 2475... Training loss: 1.2919... 0.1249 sec/batch Epoch: 6/20... Training Step: 2476... Training loss: 1.2340... 0.1223 sec/batch Epoch: 6/20... Training Step: 2477... Training loss: 1.2620... 0.1194 sec/batch Epoch: 6/20... Training Step: 2478... Training loss: 1.1818... 0.1215 sec/batch Epoch: 6/20... Training Step: 2479... Training loss: 1.2103... 0.1232 sec/batch Epoch: 6/20... Training Step: 2480... Training loss: 1.2561... 0.1249 sec/batch Epoch: 6/20... Training Step: 2481... Training loss: 1.4544... 0.1183 sec/batch Epoch: 6/20... Training Step: 2482... Training loss: 1.3303... 0.1190 sec/batch Epoch: 6/20... Training Step: 2483... Training loss: 1.3413... 0.1236 sec/batch Epoch: 6/20... Training Step: 2484... Training loss: 1.2096... 0.1217 sec/batch Epoch: 6/20... Training Step: 2485... Training loss: 1.3481... 0.1235 sec/batch Epoch: 6/20... Training Step: 2486... Training loss: 1.2442... 0.1239 sec/batch Epoch: 6/20... Training Step: 2487... Training loss: 1.2041... 0.1234 sec/batch Epoch: 6/20... Training Step: 2488... Training loss: 1.5734... 0.1204 sec/batch Epoch: 6/20... Training Step: 2489... Training loss: 1.2203... 0.1228 sec/batch Epoch: 6/20... Training Step: 2490... Training loss: 1.3087... 0.1199 sec/batch Epoch: 6/20... Training Step: 2491... Training loss: 1.4068... 0.1152 sec/batch Epoch: 6/20... Training Step: 2492... Training loss: 1.4106... 0.1196 sec/batch Epoch: 6/20... Training Step: 2493... Training loss: 1.1642... 0.1206 sec/batch Epoch: 6/20... Training Step: 2494... Training loss: 1.2704... 0.1223 sec/batch Epoch: 6/20... Training Step: 2495... Training loss: 1.3372... 0.1202 sec/batch Epoch: 6/20... Training Step: 2496... Training loss: 1.1189... 0.1197 sec/batch Epoch: 6/20... Training Step: 2497... Training loss: 1.0800... 0.1204 sec/batch Epoch: 6/20... Training Step: 2498... Training loss: 1.4059... 0.1214 sec/batch Epoch: 6/20... Training Step: 2499... Training loss: 1.1671... 0.1212 sec/batch Epoch: 6/20... Training Step: 2500... Training loss: 1.4284... 0.1183 sec/batch Epoch: 6/20... Training Step: 2501... Training loss: 1.1956... 0.1215 sec/batch Epoch: 6/20... Training Step: 2502... Training loss: 1.3876... 0.1196 sec/batch Epoch: 6/20... Training Step: 2503... Training loss: 1.2569... 0.1173 sec/batch Epoch: 6/20... Training Step: 2504... Training loss: 1.2678... 0.1219 sec/batch Epoch: 6/20... Training Step: 2505... Training loss: 1.4282... 0.1211 sec/batch Epoch: 6/20... Training Step: 2506... Training loss: 1.3326... 0.1230 sec/batch Epoch: 6/20... Training Step: 2507... Training loss: 1.4063... 0.1292 sec/batch Epoch: 6/20... Training Step: 2508... Training loss: 1.1530... 0.1300 sec/batch Epoch: 6/20... Training Step: 2509... Training loss: 1.4763... 0.1298 sec/batch Epoch: 6/20... Training Step: 2510... Training loss: 1.1846... 0.1266 sec/batch Epoch: 6/20... Training Step: 2511... Training loss: 1.2322... 0.1219 sec/batch Epoch: 6/20... Training Step: 2512... Training loss: 1.2969... 0.1256 sec/batch Epoch: 6/20... Training Step: 2513... Training loss: 1.2889... 0.1277 sec/batch Epoch: 6/20... Training Step: 2514... Training loss: 1.3047... 0.1170 sec/batch Epoch: 6/20... Training Step: 2515... Training loss: 1.4125... 0.1248 sec/batch Epoch: 6/20... Training Step: 2516... Training loss: 1.2930... 0.1305 sec/batch Epoch: 6/20... Training Step: 2517... Training loss: 1.2578... 0.1283 sec/batch Epoch: 6/20... Training Step: 2518... Training loss: 1.4139... 0.1359 sec/batch Epoch: 6/20... Training Step: 2519... Training loss: 1.0733... 0.1243 sec/batch Epoch: 6/20... Training Step: 2520... Training loss: 1.2980... 0.1225 sec/batch Epoch: 6/20... Training Step: 2521... Training loss: 1.2366... 0.1234 sec/batch Epoch: 6/20... Training Step: 2522... Training loss: 1.3186... 0.1210 sec/batch Epoch: 6/20... Training Step: 2523... Training loss: 1.2487... 0.1212 sec/batch Epoch: 6/20... Training Step: 2524... Training loss: 1.4179... 0.1236 sec/batch Epoch: 6/20... Training Step: 2525... Training loss: 1.2025... 0.1221 sec/batch Epoch: 6/20... Training Step: 2526... Training loss: 1.2384... 0.1228 sec/batch Epoch: 6/20... Training Step: 2527... Training loss: 1.1733... 0.1234 sec/batch Epoch: 6/20... Training Step: 2528... Training loss: 1.4089... 0.1582 sec/batch Epoch: 6/20... Training Step: 2529... Training loss: 1.3204... 0.1262 sec/batch Epoch: 6/20... Training Step: 2530... Training loss: 1.0509... 0.1227 sec/batch Epoch: 6/20... Training Step: 2531... Training loss: 1.0744... 0.1190 sec/batch Epoch: 6/20... Training Step: 2532... Training loss: 1.3947... 0.1246 sec/batch Epoch: 6/20... Training Step: 2533... Training loss: 1.4841... 0.1214 sec/batch Epoch: 6/20... Training Step: 2534... Training loss: 1.2203... 0.1203 sec/batch Epoch: 6/20... Training Step: 2535... Training loss: 1.5004... 0.1235 sec/batch Epoch: 6/20... Training Step: 2536... Training loss: 1.2421... 0.1228 sec/batch Epoch: 6/20... Training Step: 2537... Training loss: 1.3332... 0.1194 sec/batch Epoch: 6/20... Training Step: 2538... Training loss: 1.3127... 0.1224 sec/batch Epoch: 6/20... Training Step: 2539... Training loss: 1.4510... 0.1210 sec/batch Epoch: 6/20... Training Step: 2540... Training loss: 1.3265... 0.1195 sec/batch Epoch: 6/20... Training Step: 2541... Training loss: 1.2189... 0.1242 sec/batch Epoch: 6/20... Training Step: 2542... Training loss: 1.4283... 0.1219 sec/batch Epoch: 6/20... Training Step: 2543... Training loss: 1.4004... 0.1185 sec/batch Epoch: 6/20... Training Step: 2544... Training loss: 1.4495... 0.1283 sec/batch Epoch: 6/20... Training Step: 2545... Training loss: 1.3090... 0.1344 sec/batch Epoch: 6/20... Training Step: 2546... Training loss: 1.4558... 0.1292 sec/batch Epoch: 6/20... Training Step: 2547... Training loss: 1.5184... 0.1283 sec/batch Epoch: 6/20... Training Step: 2548... Training loss: 1.1978... 0.1278 sec/batch Epoch: 6/20... Training Step: 2549... Training loss: 1.3501... 0.1223 sec/batch Epoch: 6/20... Training Step: 2550... Training loss: 1.3034... 0.1223 sec/batch Epoch: 6/20... Training Step: 2551... Training loss: 1.2627... 0.1209 sec/batch Epoch: 6/20... Training Step: 2552... Training loss: 1.2194... 0.1185 sec/batch Epoch: 6/20... Training Step: 2553... Training loss: 1.5413... 0.1225 sec/batch Epoch: 6/20... Training Step: 2554... Training loss: 1.2447... 0.1181 sec/batch Epoch: 6/20... Training Step: 2555... Training loss: 1.5604... 0.1225 sec/batch Epoch: 6/20... Training Step: 2556... Training loss: 1.2957... 0.1220 sec/batch Epoch: 6/20... Training Step: 2557... Training loss: 1.5167... 0.1230 sec/batch Epoch: 6/20... Training Step: 2558... Training loss: 1.1945... 0.1204 sec/batch Epoch: 6/20... Training Step: 2559... Training loss: 1.4043... 0.1209 sec/batch Epoch: 6/20... Training Step: 2560... Training loss: 1.5053... 0.1234 sec/batch Epoch: 6/20... Training Step: 2561... Training loss: 1.2642... 0.1262 sec/batch Epoch: 6/20... Training Step: 2562... Training loss: 1.2124... 0.1272 sec/batch Epoch: 6/20... Training Step: 2563... Training loss: 1.3084... 0.1257 sec/batch Epoch: 6/20... Training Step: 2564... Training loss: 1.3336... 0.1215 sec/batch Epoch: 6/20... Training Step: 2565... Training loss: 1.3394... 0.1226 sec/batch Epoch: 6/20... Training Step: 2566... Training loss: 1.1771... 0.1193 sec/batch Epoch: 6/20... Training Step: 2567... Training loss: 1.2187... 0.1249 sec/batch Epoch: 6/20... Training Step: 2568... Training loss: 1.4689... 0.1232 sec/batch Epoch: 6/20... Training Step: 2569... Training loss: 1.2879... 0.1247 sec/batch Epoch: 6/20... Training Step: 2570... Training loss: 1.2866... 0.1201 sec/batch Epoch: 6/20... Training Step: 2571... Training loss: 1.4767... 0.1183 sec/batch Epoch: 6/20... Training Step: 2572... Training loss: 1.3109... 0.1211 sec/batch Epoch: 6/20... Training Step: 2573... Training loss: 1.2209... 0.1252 sec/batch Epoch: 6/20... Training Step: 2574... Training loss: 1.2947... 0.1252 sec/batch Epoch: 6/20... Training Step: 2575... Training loss: 1.2359... 0.1206 sec/batch Epoch: 6/20... Training Step: 2576... Training loss: 1.2758... 0.1251 sec/batch Epoch: 6/20... Training Step: 2577... Training loss: 1.3215... 0.1220 sec/batch Epoch: 6/20... Training Step: 2578... Training loss: 1.2118... 0.1233 sec/batch Epoch: 6/20... Training Step: 2579... Training loss: 1.1892... 0.1229 sec/batch Epoch: 6/20... Training Step: 2580... Training loss: 1.2476... 0.1219 sec/batch Epoch: 6/20... Training Step: 2581... Training loss: 1.3397... 0.1201 sec/batch Epoch: 6/20... Training Step: 2582... Training loss: 1.3370... 0.1207 sec/batch Epoch: 6/20... Training Step: 2583... Training loss: 1.2363... 0.1184 sec/batch Epoch: 6/20... Training Step: 2584... Training loss: 1.4695... 0.1232 sec/batch Epoch: 6/20... Training Step: 2585... Training loss: 1.4374... 0.1187 sec/batch Epoch: 6/20... Training Step: 2586... Training loss: 1.3395... 0.1225 sec/batch Epoch: 6/20... Training Step: 2587... Training loss: 1.5199... 0.1237 sec/batch Epoch: 6/20... Training Step: 2588... Training loss: 1.4206... 0.1168 sec/batch Epoch: 6/20... Training Step: 2589... Training loss: 1.3456... 0.1227 sec/batch Epoch: 6/20... Training Step: 2590... Training loss: 1.5636... 0.1204 sec/batch Epoch: 6/20... Training Step: 2591... Training loss: 1.4367... 0.1188 sec/batch Epoch: 6/20... Training Step: 2592... Training loss: 1.5237... 0.1191 sec/batch Epoch: 6/20... Training Step: 2593... Training loss: 1.4519... 0.1200 sec/batch Epoch: 6/20... Training Step: 2594... Training loss: 1.3393... 0.1347 sec/batch Epoch: 6/20... Training Step: 2595... Training loss: 1.3891... 0.1256 sec/batch Epoch: 6/20... Training Step: 2596... Training loss: 1.3778... 0.1285 sec/batch Epoch: 6/20... Training Step: 2597... Training loss: 1.3558... 0.1312 sec/batch Epoch: 6/20... Training Step: 2598... Training loss: 1.5473... 0.1301 sec/batch Epoch: 6/20... Training Step: 2599... Training loss: 1.2889... 0.1273 sec/batch Epoch: 6/20... Training Step: 2600... Training loss: 1.2634... 0.1215 sec/batch Epoch: 6/20... Training Step: 2601... Training loss: 1.2807... 0.1258 sec/batch Epoch: 6/20... Training Step: 2602... Training loss: 1.2044... 0.1222 sec/batch Epoch: 6/20... Training Step: 2603... Training loss: 1.2936... 0.1293 sec/batch Epoch: 6/20... Training Step: 2604... Training loss: 1.4126... 0.1272 sec/batch Epoch: 6/20... Training Step: 2605... Training loss: 1.2088... 0.1256 sec/batch Epoch: 6/20... Training Step: 2606... Training loss: 1.3395... 0.1251 sec/batch Epoch: 6/20... Training Step: 2607... Training loss: 1.3479... 0.1249 sec/batch Epoch: 6/20... Training Step: 2608... Training loss: 1.3363... 0.1241 sec/batch Epoch: 6/20... Training Step: 2609... Training loss: 1.3678... 0.1269 sec/batch Epoch: 6/20... Training Step: 2610... Training loss: 1.2880... 0.1178 sec/batch Epoch: 6/20... Training Step: 2611... Training loss: 1.2258... 0.1240 sec/batch Epoch: 6/20... Training Step: 2612... Training loss: 1.2708... 0.1263 sec/batch Epoch: 6/20... Training Step: 2613... Training loss: 1.1810... 0.1246 sec/batch Epoch: 6/20... Training Step: 2614... Training loss: 1.3215... 0.1242 sec/batch Epoch: 6/20... Training Step: 2615... Training loss: 1.2815... 0.1235 sec/batch Epoch: 6/20... Training Step: 2616... Training loss: 1.6054... 0.1163 sec/batch Epoch: 6/20... Training Step: 2617... Training loss: 1.2212... 0.1251 sec/batch Epoch: 6/20... Training Step: 2618... Training loss: 1.2007... 0.1281 sec/batch Epoch: 6/20... Training Step: 2619... Training loss: 1.2158... 0.1251 sec/batch Epoch: 6/20... Training Step: 2620... Training loss: 1.1755... 0.1238 sec/batch Epoch: 6/20... Training Step: 2621... Training loss: 1.3614... 0.1213 sec/batch Epoch: 6/20... Training Step: 2622... Training loss: 1.2741... 0.1230 sec/batch Epoch: 6/20... Training Step: 2623... Training loss: 1.0992... 0.1220 sec/batch Epoch: 6/20... Training Step: 2624... Training loss: 1.4738... 0.1251 sec/batch Epoch: 6/20... Training Step: 2625... Training loss: 1.2304... 0.1324 sec/batch Epoch: 6/20... Training Step: 2626... Training loss: 1.3359... 0.1225 sec/batch Epoch: 6/20... Training Step: 2627... Training loss: 1.3527... 0.1223 sec/batch Epoch: 6/20... Training Step: 2628... Training loss: 1.5696... 0.1153 sec/batch Epoch: 6/20... Training Step: 2629... Training loss: 1.3618... 0.1155 sec/batch Epoch: 6/20... Training Step: 2630... Training loss: 1.3211... 0.1222 sec/batch Epoch: 6/20... Training Step: 2631... Training loss: 1.3071... 0.1198 sec/batch Epoch: 6/20... Training Step: 2632... Training loss: 1.2305... 0.1288 sec/batch Epoch: 6/20... Training Step: 2633... Training loss: 1.2516... 0.1230 sec/batch Epoch: 6/20... Training Step: 2634... Training loss: 1.3357... 0.1215 sec/batch Epoch: 6/20... Training Step: 2635... Training loss: 1.0482... 0.1205 sec/batch Epoch: 6/20... Training Step: 2636... Training loss: 1.1613... 0.1237 sec/batch Epoch: 6/20... Training Step: 2637... Training loss: 1.0563... 0.1333 sec/batch Epoch: 6/20... Training Step: 2638... Training loss: 1.3082... 0.1238 sec/batch Epoch: 6/20... Training Step: 2639... Training loss: 1.2320... 0.1343 sec/batch Epoch: 6/20... Training Step: 2640... Training loss: 1.2189... 0.1269 sec/batch Epoch: 6/20... Training Step: 2641... Training loss: 1.1423... 0.1289 sec/batch Epoch: 6/20... Training Step: 2642... Training loss: 1.4718... 0.1234 sec/batch Epoch: 6/20... Training Step: 2643... Training loss: 1.2052... 0.1275 sec/batch Epoch: 6/20... Training Step: 2644... Training loss: 1.1737... 0.1191 sec/batch Epoch: 6/20... Training Step: 2645... Training loss: 1.1290... 0.1218 sec/batch Epoch: 6/20... Training Step: 2646... Training loss: 1.1563... 0.1336 sec/batch Epoch: 6/20... Training Step: 2647... Training loss: 1.2487... 0.1340 sec/batch Epoch: 6/20... Training Step: 2648... Training loss: 1.2252... 0.1251 sec/batch Epoch: 6/20... Training Step: 2649... Training loss: 1.2746... 0.1229 sec/batch Epoch: 6/20... Training Step: 2650... Training loss: 1.1701... 0.1234 sec/batch Epoch: 6/20... Training Step: 2651... Training loss: 1.2769... 0.1206 sec/batch Epoch: 6/20... Training Step: 2652... Training loss: 1.2191... 0.1215 sec/batch Epoch: 6/20... Training Step: 2653... Training loss: 1.1704... 0.1262 sec/batch Epoch: 6/20... Training Step: 2654... Training loss: 1.2015... 0.1232 sec/batch Epoch: 6/20... Training Step: 2655... Training loss: 1.4035... 0.1229 sec/batch Epoch: 6/20... Training Step: 2656... Training loss: 1.1808... 0.1194 sec/batch Epoch: 6/20... Training Step: 2657... Training loss: 1.2434... 0.1213 sec/batch Epoch: 6/20... Training Step: 2658... Training loss: 1.1943... 0.1207 sec/batch Epoch: 6/20... Training Step: 2659... Training loss: 1.3294... 0.1212 sec/batch Epoch: 6/20... Training Step: 2660... Training loss: 1.1892... 0.1229 sec/batch Epoch: 6/20... Training Step: 2661... Training loss: 1.1274... 0.1214 sec/batch Epoch: 6/20... Training Step: 2662... Training loss: 1.2657... 0.1130 sec/batch Epoch: 6/20... Training Step: 2663... Training loss: 1.0259... 0.1222 sec/batch Epoch: 6/20... Training Step: 2664... Training loss: 1.3320... 0.1219 sec/batch Epoch: 6/20... Training Step: 2665... Training loss: 1.1805... 0.1227 sec/batch Epoch: 6/20... Training Step: 2666... Training loss: 1.0993... 0.1228 sec/batch Epoch: 6/20... Training Step: 2667... Training loss: 1.0682... 0.1194 sec/batch Epoch: 6/20... Training Step: 2668... Training loss: 1.4938... 0.1202 sec/batch Epoch: 6/20... Training Step: 2669... Training loss: 1.2091... 0.1202 sec/batch Epoch: 6/20... Training Step: 2670... Training loss: 1.2793... 0.1198 sec/batch Epoch: 6/20... Training Step: 2671... Training loss: 1.2122... 0.1252 sec/batch Epoch: 6/20... Training Step: 2672... Training loss: 1.1276... 0.1234 sec/batch Epoch: 6/20... Training Step: 2673... Training loss: 1.0146... 0.1189 sec/batch Epoch: 6/20... Training Step: 2674... Training loss: 0.9861... 0.1251 sec/batch Epoch: 6/20... Training Step: 2675... Training loss: 1.2772... 0.1225 sec/batch Epoch: 6/20... Training Step: 2676... Training loss: 1.1507... 0.1239 sec/batch Epoch: 6/20... Training Step: 2677... Training loss: 1.1612... 0.1197 sec/batch Epoch: 6/20... Training Step: 2678... Training loss: 1.3257... 0.1222 sec/batch Epoch: 6/20... Training Step: 2679... Training loss: 1.5169... 0.1244 sec/batch Epoch: 6/20... Training Step: 2680... Training loss: 1.0488... 0.1287 sec/batch Epoch: 6/20... Training Step: 2681... Training loss: 1.5007... 0.1309 sec/batch Epoch: 6/20... Training Step: 2682... Training loss: 1.1452... 0.1189 sec/batch Epoch: 6/20... Training Step: 2683... Training loss: 1.1118... 0.1197 sec/batch Epoch: 6/20... Training Step: 2684... Training loss: 1.1954... 0.1199 sec/batch Epoch: 6/20... Training Step: 2685... Training loss: 1.1546... 0.1231 sec/batch Epoch: 6/20... Training Step: 2686... Training loss: 1.3192... 0.1176 sec/batch Epoch: 6/20... Training Step: 2687... Training loss: 1.2011... 0.1250 sec/batch Epoch: 6/20... Training Step: 2688... Training loss: 1.4733... 0.1239 sec/batch Epoch: 6/20... Training Step: 2689... Training loss: 1.2262... 0.1239 sec/batch Epoch: 6/20... Training Step: 2690... Training loss: 1.1838... 0.1271 sec/batch Epoch: 6/20... Training Step: 2691... Training loss: 1.1633... 0.1272 sec/batch Epoch: 6/20... Training Step: 2692... Training loss: 1.2878... 0.1207 sec/batch Epoch: 6/20... Training Step: 2693... Training loss: 1.2619... 0.1254 sec/batch Epoch: 6/20... Training Step: 2694... Training loss: 1.3523... 0.1234 sec/batch Epoch: 6/20... Training Step: 2695... Training loss: 1.2359... 0.1233 sec/batch Epoch: 6/20... Training Step: 2696... Training loss: 1.2706... 0.1244 sec/batch Epoch: 6/20... Training Step: 2697... Training loss: 1.3521... 0.1233 sec/batch Epoch: 6/20... Training Step: 2698... Training loss: 1.3451... 0.1238 sec/batch Epoch: 6/20... Training Step: 2699... Training loss: 1.1686... 0.1206 sec/batch Epoch: 6/20... Training Step: 2700... Training loss: 1.3626... 0.1224 sec/batch Epoch: 6/20... Training Step: 2701... Training loss: 1.1392... 0.1194 sec/batch Epoch: 6/20... Training Step: 2702... Training loss: 1.1876... 0.1217 sec/batch Epoch: 6/20... Training Step: 2703... Training loss: 1.2004... 0.1248 sec/batch Epoch: 6/20... Training Step: 2704... Training loss: 1.2247... 0.1201 sec/batch Epoch: 6/20... Training Step: 2705... Training loss: 1.1215... 0.1227 sec/batch Epoch: 6/20... Training Step: 2706... Training loss: 1.2892... 0.1206 sec/batch Epoch: 6/20... Training Step: 2707... Training loss: 1.2517... 0.1207 sec/batch Epoch: 6/20... Training Step: 2708... Training loss: 1.0285... 0.1183 sec/batch Epoch: 6/20... Training Step: 2709... Training loss: 1.0774... 0.1257 sec/batch Epoch: 6/20... Training Step: 2710... Training loss: 1.2098... 0.1220 sec/batch Epoch: 6/20... Training Step: 2711... Training loss: 1.0020... 0.1178 sec/batch Epoch: 6/20... Training Step: 2712... Training loss: 1.0795... 0.1211 sec/batch Epoch: 6/20... Training Step: 2713... Training loss: 1.0765... 0.1208 sec/batch Epoch: 6/20... Training Step: 2714... Training loss: 1.2854... 0.1189 sec/batch Epoch: 6/20... Training Step: 2715... Training loss: 1.1075... 0.1223 sec/batch Epoch: 6/20... Training Step: 2716... Training loss: 1.3624... 0.1243 sec/batch Epoch: 6/20... Training Step: 2717... Training loss: 1.1206... 0.1196 sec/batch Epoch: 6/20... Training Step: 2718... Training loss: 1.1915... 0.1262 sec/batch Epoch: 6/20... Training Step: 2719... Training loss: 1.1577... 0.1227 sec/batch Epoch: 6/20... Training Step: 2720... Training loss: 1.1904... 0.1211 sec/batch Epoch: 6/20... Training Step: 2721... Training loss: 1.1397... 0.1194 sec/batch Epoch: 6/20... Training Step: 2722... Training loss: 1.1919... 0.1224 sec/batch Epoch: 6/20... Training Step: 2723... Training loss: 1.1319... 0.1285 sec/batch Epoch: 6/20... Training Step: 2724... Training loss: 1.3126... 0.1211 sec/batch Epoch: 6/20... Training Step: 2725... Training loss: 1.2256... 0.1232 sec/batch Epoch: 6/20... Training Step: 2726... Training loss: 1.4587... 0.1203 sec/batch Epoch: 6/20... Training Step: 2727... Training loss: 1.2153... 0.1263 sec/batch Epoch: 6/20... Training Step: 2728... Training loss: 1.4818... 0.1203 sec/batch Epoch: 6/20... Training Step: 2729... Training loss: 1.3395... 0.1196 sec/batch Epoch: 6/20... Training Step: 2730... Training loss: 1.2651... 0.1245 sec/batch Epoch: 6/20... Training Step: 2731... Training loss: 1.1330... 0.1297 sec/batch Epoch: 6/20... Training Step: 2732... Training loss: 1.3653... 0.1308 sec/batch Epoch: 6/20... Training Step: 2733... Training loss: 1.3198... 0.1308 sec/batch Epoch: 6/20... Training Step: 2734... Training loss: 1.3021... 0.1338 sec/batch Epoch: 6/20... Training Step: 2735... Training loss: 1.5001... 0.1254 sec/batch Epoch: 6/20... Training Step: 2736... Training loss: 1.4025... 0.1309 sec/batch Epoch: 6/20... Training Step: 2737... Training loss: 1.3685... 0.1307 sec/batch Epoch: 6/20... Training Step: 2738... Training loss: 1.2107... 0.1322 sec/batch Epoch: 6/20... Training Step: 2739... Training loss: 1.2744... 0.1286 sec/batch Epoch: 6/20... Training Step: 2740... Training loss: 1.0829... 0.1244 sec/batch Epoch: 6/20... Training Step: 2741... Training loss: 1.4387... 0.1222 sec/batch Epoch: 6/20... Training Step: 2742... Training loss: 1.2008... 0.1229 sec/batch Epoch: 6/20... Training Step: 2743... Training loss: 1.3727... 0.1290 sec/batch Epoch: 6/20... Training Step: 2744... Training loss: 1.4844... 0.1224 sec/batch Epoch: 6/20... Training Step: 2745... Training loss: 1.2374... 0.1259 sec/batch Epoch: 6/20... Training Step: 2746... Training loss: 1.2671... 0.1257 sec/batch Epoch: 6/20... Training Step: 2747... Training loss: 1.3834... 0.1239 sec/batch Epoch: 6/20... Training Step: 2748... Training loss: 1.3830... 0.1199 sec/batch Epoch: 6/20... Training Step: 2749... Training loss: 1.1991... 0.1228 sec/batch Epoch: 6/20... Training Step: 2750... Training loss: 1.2974... 0.1214 sec/batch Epoch: 6/20... Training Step: 2751... Training loss: 1.5204... 0.1175 sec/batch Epoch: 6/20... Training Step: 2752... Training loss: 1.2199... 0.1257 sec/batch Epoch: 6/20... Training Step: 2753... Training loss: 1.6371... 0.1263 sec/batch Epoch: 6/20... Training Step: 2754... Training loss: 1.4243... 0.1258 sec/batch Epoch: 6/20... Training Step: 2755... Training loss: 1.2148... 0.1185 sec/batch Epoch: 6/20... Training Step: 2756... Training loss: 1.3230... 0.1256 sec/batch Epoch: 6/20... Training Step: 2757... Training loss: 1.2326... 0.1266 sec/batch Epoch: 6/20... Training Step: 2758... Training loss: 1.4747... 0.1204 sec/batch Epoch: 6/20... Training Step: 2759... Training loss: 1.4895... 0.1261 sec/batch Epoch: 6/20... Training Step: 2760... Training loss: 1.6230... 0.1221 sec/batch Epoch: 6/20... Training Step: 2761... Training loss: 1.2875... 0.1209 sec/batch Epoch: 6/20... Training Step: 2762... Training loss: 1.2346... 0.1221 sec/batch Epoch: 6/20... Training Step: 2763... Training loss: 1.3268... 0.1191 sec/batch Epoch: 6/20... Training Step: 2764... Training loss: 1.2308... 0.1207 sec/batch Epoch: 6/20... Training Step: 2765... Training loss: 1.3149... 0.1254 sec/batch Epoch: 6/20... Training Step: 2766... Training loss: 1.1606... 0.1224 sec/batch Epoch: 6/20... Training Step: 2767... Training loss: 1.2550... 0.1219 sec/batch Epoch: 6/20... Training Step: 2768... Training loss: 1.1248... 0.1224 sec/batch Epoch: 6/20... Training Step: 2769... Training loss: 1.2580... 0.1248 sec/batch Epoch: 6/20... Training Step: 2770... Training loss: 1.3312... 0.1228 sec/batch Epoch: 6/20... Training Step: 2771... Training loss: 1.3274... 0.1232 sec/batch Epoch: 6/20... Training Step: 2772... Training loss: 1.2951... 0.1216 sec/batch Epoch: 6/20... Training Step: 2773... Training loss: 1.1720... 0.1265 sec/batch Epoch: 6/20... Training Step: 2774... Training loss: 1.5893... 0.1210 sec/batch Epoch: 6/20... Training Step: 2775... Training loss: 1.4110... 0.1226 sec/batch Epoch: 6/20... Training Step: 2776... Training loss: 1.1963... 0.1227 sec/batch Epoch: 6/20... Training Step: 2777... Training loss: 1.1328... 0.1239 sec/batch Epoch: 6/20... Training Step: 2778... Training loss: 1.0506... 0.1225 sec/batch Epoch: 6/20... Training Step: 2779... Training loss: 1.0907... 0.1211 sec/batch Epoch: 6/20... Training Step: 2780... Training loss: 1.2546... 0.1198 sec/batch Epoch: 6/20... Training Step: 2781... Training loss: 1.4092... 0.1238 sec/batch Epoch: 6/20... Training Step: 2782... Training loss: 1.1441... 0.1231 sec/batch Epoch: 6/20... Training Step: 2783... Training loss: 1.2670... 0.1222 sec/batch Epoch: 6/20... Training Step: 2784... Training loss: 1.2103... 0.1212 sec/batch Epoch: 7/20... Training Step: 2785... Training loss: 1.6022... 0.1177 sec/batch Epoch: 7/20... Training Step: 2786... Training loss: 1.2978... 0.1223 sec/batch Epoch: 7/20... Training Step: 2787... Training loss: 1.2757... 0.1219 sec/batch Epoch: 7/20... Training Step: 2788... Training loss: 1.2762... 0.1234 sec/batch Epoch: 7/20... Training Step: 2789... Training loss: 1.3186... 0.1196 sec/batch Epoch: 7/20... Training Step: 2790... Training loss: 1.1030... 0.1203 sec/batch Epoch: 7/20... Training Step: 2791... Training loss: 1.4062... 0.1198 sec/batch Epoch: 7/20... Training Step: 2792... Training loss: 1.2127... 0.1219 sec/batch Epoch: 7/20... Training Step: 2793... Training loss: 1.0612... 0.1180 sec/batch Epoch: 7/20... Training Step: 2794... Training loss: 1.3288... 0.1236 sec/batch Epoch: 7/20... Training Step: 2795... Training loss: 1.1410... 0.1246 sec/batch Epoch: 7/20... Training Step: 2796... Training loss: 1.0711... 0.1205 sec/batch Epoch: 7/20... Training Step: 2797... Training loss: 1.4178... 0.1219 sec/batch Epoch: 7/20... Training Step: 2798... Training loss: 1.0990... 0.1226 sec/batch Epoch: 7/20... Training Step: 2799... Training loss: 1.2992... 0.1213 sec/batch Epoch: 7/20... Training Step: 2800... Training loss: 1.3827... 0.1229 sec/batch Epoch: 7/20... Training Step: 2801... Training loss: 1.2046... 0.1240 sec/batch Epoch: 7/20... Training Step: 2802... Training loss: 1.0667... 0.1220 sec/batch Epoch: 7/20... Training Step: 2803... Training loss: 1.2391... 0.1273 sec/batch Epoch: 7/20... Training Step: 2804... Training loss: 1.0837... 0.1251 sec/batch Epoch: 7/20... Training Step: 2805... Training loss: 1.3235... 0.1246 sec/batch Epoch: 7/20... Training Step: 2806... Training loss: 1.1864... 0.1203 sec/batch Epoch: 7/20... Training Step: 2807... Training loss: 1.4851... 0.1232 sec/batch Epoch: 7/20... Training Step: 2808... Training loss: 1.1897... 0.1209 sec/batch Epoch: 7/20... Training Step: 2809... Training loss: 1.1682... 0.1201 sec/batch Epoch: 7/20... Training Step: 2810... Training loss: 1.2310... 0.1219 sec/batch Epoch: 7/20... Training Step: 2811... Training loss: 1.3416... 0.1236 sec/batch Epoch: 7/20... Training Step: 2812... Training loss: 1.0823... 0.1236 sec/batch Epoch: 7/20... Training Step: 2813... Training loss: 1.1574... 0.1211 sec/batch Epoch: 7/20... Training Step: 2814... Training loss: 1.2997... 0.1232 sec/batch Epoch: 7/20... Training Step: 2815... Training loss: 1.0314... 0.1267 sec/batch Epoch: 7/20... Training Step: 2816... Training loss: 1.1368... 0.1215 sec/batch Epoch: 7/20... Training Step: 2817... Training loss: 1.0489... 0.1185 sec/batch Epoch: 7/20... Training Step: 2818... Training loss: 1.1406... 0.1197 sec/batch Epoch: 7/20... Training Step: 2819... Training loss: 1.1123... 0.1178 sec/batch Epoch: 7/20... Training Step: 2820... Training loss: 1.1562... 0.1204 sec/batch Epoch: 7/20... Training Step: 2821... Training loss: 1.3419... 0.1194 sec/batch Epoch: 7/20... Training Step: 2822... Training loss: 1.1514... 0.1193 sec/batch Epoch: 7/20... Training Step: 2823... Training loss: 1.1650... 0.1216 sec/batch Epoch: 7/20... Training Step: 2824... Training loss: 1.4537... 0.1186 sec/batch Epoch: 7/20... Training Step: 2825... Training loss: 1.2162... 0.1238 sec/batch Epoch: 7/20... Training Step: 2826... Training loss: 1.0869... 0.1190 sec/batch Epoch: 7/20... Training Step: 2827... Training loss: 1.3064... 0.1227 sec/batch Epoch: 7/20... Training Step: 2828... Training loss: 1.0138... 0.1203 sec/batch Epoch: 7/20... Training Step: 2829... Training loss: 1.2151... 0.1217 sec/batch Epoch: 7/20... Training Step: 2830... Training loss: 1.1558... 0.1201 sec/batch Epoch: 7/20... Training Step: 2831... Training loss: 1.2438... 0.1206 sec/batch Epoch: 7/20... Training Step: 2832... Training loss: 1.2028... 0.1242 sec/batch Epoch: 7/20... Training Step: 2833... Training loss: 1.1428... 0.1235 sec/batch Epoch: 7/20... Training Step: 2834... Training loss: 1.2648... 0.1203 sec/batch Epoch: 7/20... Training Step: 2835... Training loss: 1.2467... 0.1212 sec/batch Epoch: 7/20... Training Step: 2836... Training loss: 1.2900... 0.1243 sec/batch Epoch: 7/20... Training Step: 2837... Training loss: 1.2872... 0.1210 sec/batch Epoch: 7/20... Training Step: 2838... Training loss: 1.1544... 0.1192 sec/batch Epoch: 7/20... Training Step: 2839... Training loss: 1.1249... 0.1200 sec/batch Epoch: 7/20... Training Step: 2840... Training loss: 1.0838... 0.1202 sec/batch Epoch: 7/20... Training Step: 2841... Training loss: 1.2728... 0.1338 sec/batch Epoch: 7/20... Training Step: 2842... Training loss: 1.3078... 0.1325 sec/batch Epoch: 7/20... Training Step: 2843... Training loss: 1.0610... 0.1209 sec/batch Epoch: 7/20... Training Step: 2844... Training loss: 1.0928... 0.1209 sec/batch Epoch: 7/20... Training Step: 2845... Training loss: 1.1602... 0.1226 sec/batch Epoch: 7/20... Training Step: 2846... Training loss: 1.3309... 0.1219 sec/batch Epoch: 7/20... Training Step: 2847... Training loss: 1.0787... 0.1203 sec/batch Epoch: 7/20... Training Step: 2848... Training loss: 1.1858... 0.1213 sec/batch Epoch: 7/20... Training Step: 2849... Training loss: 1.0879... 0.1207 sec/batch Epoch: 7/20... Training Step: 2850... Training loss: 1.3595... 0.1214 sec/batch Epoch: 7/20... Training Step: 2851... Training loss: 1.1286... 0.1198 sec/batch Epoch: 7/20... Training Step: 2852... Training loss: 1.2659... 0.1208 sec/batch Epoch: 7/20... Training Step: 2853... Training loss: 1.2338... 0.1182 sec/batch Epoch: 7/20... Training Step: 2854... Training loss: 1.2420... 0.1199 sec/batch Epoch: 7/20... Training Step: 2855... Training loss: 1.3005... 0.1250 sec/batch Epoch: 7/20... Training Step: 2856... Training loss: 1.1923... 0.1210 sec/batch Epoch: 7/20... Training Step: 2857... Training loss: 1.1712... 0.1237 sec/batch Epoch: 7/20... Training Step: 2858... Training loss: 1.0615... 0.1242 sec/batch Epoch: 7/20... Training Step: 2859... Training loss: 1.3661... 0.1226 sec/batch Epoch: 7/20... Training Step: 2860... Training loss: 1.0779... 0.1207 sec/batch Epoch: 7/20... Training Step: 2861... Training loss: 1.1600... 0.1213 sec/batch Epoch: 7/20... Training Step: 2862... Training loss: 1.2276... 0.1154 sec/batch Epoch: 7/20... Training Step: 2863... Training loss: 1.2054... 0.1197 sec/batch Epoch: 7/20... Training Step: 2864... Training loss: 1.1183... 0.1257 sec/batch Epoch: 7/20... Training Step: 2865... Training loss: 1.2551... 0.1238 sec/batch Epoch: 7/20... Training Step: 2866... Training loss: 1.2596... 0.1187 sec/batch Epoch: 7/20... Training Step: 2867... Training loss: 1.0930... 0.1226 sec/batch Epoch: 7/20... Training Step: 2868... Training loss: 1.3685... 0.1224 sec/batch Epoch: 7/20... Training Step: 2869... Training loss: 1.2715... 0.1203 sec/batch Epoch: 7/20... Training Step: 2870... Training loss: 1.3676... 0.1234 sec/batch Epoch: 7/20... Training Step: 2871... Training loss: 1.1656... 0.1248 sec/batch Epoch: 7/20... Training Step: 2872... Training loss: 1.3329... 0.1228 sec/batch Epoch: 7/20... Training Step: 2873... Training loss: 1.4455... 0.1200 sec/batch Epoch: 7/20... Training Step: 2874... Training loss: 1.1862... 0.1207 sec/batch Epoch: 7/20... Training Step: 2875... Training loss: 1.3555... 0.1234 sec/batch Epoch: 7/20... Training Step: 2876... Training loss: 1.3545... 0.1203 sec/batch Epoch: 7/20... Training Step: 2877... Training loss: 1.0862... 0.1242 sec/batch Epoch: 7/20... Training Step: 2878... Training loss: 1.4092... 0.1167 sec/batch Epoch: 7/20... Training Step: 2879... Training loss: 1.2123... 0.1235 sec/batch Epoch: 7/20... Training Step: 2880... Training loss: 1.1842... 0.1177 sec/batch Epoch: 7/20... Training Step: 2881... Training loss: 1.4710... 0.1194 sec/batch Epoch: 7/20... Training Step: 2882... Training loss: 1.2854... 0.1227 sec/batch Epoch: 7/20... Training Step: 2883... Training loss: 1.2302... 0.1188 sec/batch Epoch: 7/20... Training Step: 2884... Training loss: 1.1533... 0.1231 sec/batch Epoch: 7/20... Training Step: 2885... Training loss: 1.3681... 0.1234 sec/batch Epoch: 7/20... Training Step: 2886... Training loss: 1.3707... 0.1215 sec/batch Epoch: 7/20... Training Step: 2887... Training loss: 1.5041... 0.1231 sec/batch Epoch: 7/20... Training Step: 2888... Training loss: 1.2210... 0.1269 sec/batch Epoch: 7/20... Training Step: 2889... Training loss: 1.3641... 0.1275 sec/batch Epoch: 7/20... Training Step: 2890... Training loss: 1.4395... 0.1209 sec/batch Epoch: 7/20... Training Step: 2891... Training loss: 1.2698... 0.1222 sec/batch Epoch: 7/20... Training Step: 2892... Training loss: 1.4226... 0.1224 sec/batch Epoch: 7/20... Training Step: 2893... Training loss: 1.2878... 0.1231 sec/batch Epoch: 7/20... Training Step: 2894... Training loss: 1.2959... 0.1200 sec/batch Epoch: 7/20... Training Step: 2895... Training loss: 1.3491... 0.1204 sec/batch Epoch: 7/20... Training Step: 2896... Training loss: 1.1730... 0.1245 sec/batch Epoch: 7/20... Training Step: 2897... Training loss: 1.3237... 0.1280 sec/batch Epoch: 7/20... Training Step: 2898... Training loss: 1.3830... 0.1232 sec/batch Epoch: 7/20... Training Step: 2899... Training loss: 1.3956... 0.1281 sec/batch Epoch: 7/20... Training Step: 2900... Training loss: 1.2348... 0.1208 sec/batch Epoch: 7/20... Training Step: 2901... Training loss: 1.3487... 0.1238 sec/batch Epoch: 7/20... Training Step: 2902... Training loss: 1.4461... 0.1190 sec/batch Epoch: 7/20... Training Step: 2903... Training loss: 1.2144... 0.1230 sec/batch Epoch: 7/20... Training Step: 2904... Training loss: 1.1711... 0.1243 sec/batch Epoch: 7/20... Training Step: 2905... Training loss: 1.2793... 0.1243 sec/batch Epoch: 7/20... Training Step: 2906... Training loss: 1.3035... 0.1234 sec/batch Epoch: 7/20... Training Step: 2907... Training loss: 1.3720... 0.1221 sec/batch Epoch: 7/20... Training Step: 2908... Training loss: 1.2514... 0.1269 sec/batch Epoch: 7/20... Training Step: 2909... Training loss: 1.3289... 0.1195 sec/batch Epoch: 7/20... Training Step: 2910... Training loss: 1.0861... 0.1220 sec/batch Epoch: 7/20... Training Step: 2911... Training loss: 1.2257... 0.1227 sec/batch Epoch: 7/20... Training Step: 2912... Training loss: 1.2460... 0.1220 sec/batch Epoch: 7/20... Training Step: 2913... Training loss: 1.3374... 0.1176 sec/batch Epoch: 7/20... Training Step: 2914... Training loss: 1.2274... 0.1227 sec/batch Epoch: 7/20... Training Step: 2915... Training loss: 1.5028... 0.1224 sec/batch Epoch: 7/20... Training Step: 2916... Training loss: 1.2695... 0.1222 sec/batch Epoch: 7/20... Training Step: 2917... Training loss: 1.2075... 0.1258 sec/batch Epoch: 7/20... Training Step: 2918... Training loss: 1.4489... 0.1207 sec/batch Epoch: 7/20... Training Step: 2919... Training loss: 1.2187... 0.1203 sec/batch Epoch: 7/20... Training Step: 2920... Training loss: 1.1162... 0.1191 sec/batch Epoch: 7/20... Training Step: 2921... Training loss: 1.1236... 0.1229 sec/batch Epoch: 7/20... Training Step: 2922... Training loss: 1.2346... 0.1249 sec/batch Epoch: 7/20... Training Step: 2923... Training loss: 1.1896... 0.1222 sec/batch Epoch: 7/20... Training Step: 2924... Training loss: 1.3334... 0.1208 sec/batch Epoch: 7/20... Training Step: 2925... Training loss: 1.1305... 0.1225 sec/batch Epoch: 7/20... Training Step: 2926... Training loss: 1.2262... 0.1196 sec/batch Epoch: 7/20... Training Step: 2927... Training loss: 1.1787... 0.1202 sec/batch Epoch: 7/20... Training Step: 2928... Training loss: 1.2920... 0.1202 sec/batch Epoch: 7/20... Training Step: 2929... Training loss: 1.2056... 0.1204 sec/batch Epoch: 7/20... Training Step: 2930... Training loss: 1.1178... 0.1186 sec/batch Epoch: 7/20... Training Step: 2931... Training loss: 1.2777... 0.1210 sec/batch Epoch: 7/20... Training Step: 2932... Training loss: 1.1138... 0.1203 sec/batch Epoch: 7/20... Training Step: 2933... Training loss: 1.2942... 0.1212 sec/batch Epoch: 7/20... Training Step: 2934... Training loss: 1.4486... 0.1211 sec/batch Epoch: 7/20... Training Step: 2935... Training loss: 1.3656... 0.1198 sec/batch Epoch: 7/20... Training Step: 2936... Training loss: 1.4583... 0.1221 sec/batch Epoch: 7/20... Training Step: 2937... Training loss: 1.5172... 0.1205 sec/batch Epoch: 7/20... Training Step: 2938... Training loss: 1.3577... 0.1245 sec/batch Epoch: 7/20... Training Step: 2939... Training loss: 1.2891... 0.1251 sec/batch Epoch: 7/20... Training Step: 2940... Training loss: 1.1759... 0.1182 sec/batch Epoch: 7/20... Training Step: 2941... Training loss: 1.2062... 0.1200 sec/batch Epoch: 7/20... Training Step: 2942... Training loss: 1.1917... 0.1182 sec/batch Epoch: 7/20... Training Step: 2943... Training loss: 1.1627... 0.1222 sec/batch Epoch: 7/20... Training Step: 2944... Training loss: 1.1959... 0.1187 sec/batch Epoch: 7/20... Training Step: 2945... Training loss: 1.3872... 0.1206 sec/batch Epoch: 7/20... Training Step: 2946... Training loss: 1.2075... 0.1233 sec/batch Epoch: 7/20... Training Step: 2947... Training loss: 1.4168... 0.1262 sec/batch Epoch: 7/20... Training Step: 2948... Training loss: 1.0882... 0.1221 sec/batch Epoch: 7/20... Training Step: 2949... Training loss: 1.2932... 0.1227 sec/batch Epoch: 7/20... Training Step: 2950... Training loss: 1.2506... 0.1207 sec/batch Epoch: 7/20... Training Step: 2951... Training loss: 1.1376... 0.1212 sec/batch Epoch: 7/20... Training Step: 2952... Training loss: 1.3625... 0.1222 sec/batch Epoch: 7/20... Training Step: 2953... Training loss: 1.1834... 0.1280 sec/batch Epoch: 7/20... Training Step: 2954... Training loss: 1.2536... 0.1206 sec/batch Epoch: 7/20... Training Step: 2955... Training loss: 1.3487... 0.1226 sec/batch Epoch: 7/20... Training Step: 2956... Training loss: 1.3959... 0.1199 sec/batch Epoch: 7/20... Training Step: 2957... Training loss: 1.1147... 0.1219 sec/batch Epoch: 7/20... Training Step: 2958... Training loss: 1.3603... 0.1252 sec/batch Epoch: 7/20... Training Step: 2959... Training loss: 1.2933... 0.1203 sec/batch Epoch: 7/20... Training Step: 2960... Training loss: 1.1402... 0.1222 sec/batch Epoch: 7/20... Training Step: 2961... Training loss: 1.1359... 0.1216 sec/batch Epoch: 7/20... Training Step: 2962... Training loss: 1.4074... 0.1238 sec/batch Epoch: 7/20... Training Step: 2963... Training loss: 1.0790... 0.1220 sec/batch Epoch: 7/20... Training Step: 2964... Training loss: 1.3171... 0.1174 sec/batch Epoch: 7/20... Training Step: 2965... Training loss: 1.0411... 0.1194 sec/batch Epoch: 7/20... Training Step: 2966... Training loss: 1.3761... 0.1238 sec/batch Epoch: 7/20... Training Step: 2967... Training loss: 1.3227... 0.1253 sec/batch Epoch: 7/20... Training Step: 2968... Training loss: 1.1409... 0.1206 sec/batch Epoch: 7/20... Training Step: 2969... Training loss: 1.4530... 0.1249 sec/batch Epoch: 7/20... Training Step: 2970... Training loss: 1.2685... 0.1342 sec/batch Epoch: 7/20... Training Step: 2971... Training loss: 1.3722... 0.1291 sec/batch Epoch: 7/20... Training Step: 2972... Training loss: 1.0738... 0.1183 sec/batch Epoch: 7/20... Training Step: 2973... Training loss: 1.3365... 0.1143 sec/batch Epoch: 7/20... Training Step: 2974... Training loss: 1.2478... 0.1185 sec/batch Epoch: 7/20... Training Step: 2975... Training loss: 1.2580... 0.1272 sec/batch Epoch: 7/20... Training Step: 2976... Training loss: 1.4199... 0.1286 sec/batch Epoch: 7/20... Training Step: 2977... Training loss: 1.2238... 0.1252 sec/batch Epoch: 7/20... Training Step: 2978... Training loss: 1.1797... 0.1318 sec/batch Epoch: 7/20... Training Step: 2979... Training loss: 1.2900... 0.1397 sec/batch Epoch: 7/20... Training Step: 2980... Training loss: 1.3102... 0.1459 sec/batch Epoch: 7/20... Training Step: 2981... Training loss: 1.1616... 0.1313 sec/batch Epoch: 7/20... Training Step: 2982... Training loss: 1.3561... 0.1290 sec/batch Epoch: 7/20... Training Step: 2983... Training loss: 0.9945... 0.1355 sec/batch Epoch: 7/20... Training Step: 2984... Training loss: 1.1149... 0.1252 sec/batch Epoch: 7/20... Training Step: 2985... Training loss: 1.1706... 0.1185 sec/batch Epoch: 7/20... Training Step: 2986... Training loss: 1.2888... 0.1164 sec/batch Epoch: 7/20... Training Step: 2987... Training loss: 1.2587... 0.1146 sec/batch Epoch: 7/20... Training Step: 2988... Training loss: 1.3948... 0.1234 sec/batch Epoch: 7/20... Training Step: 2989... Training loss: 1.0784... 0.1209 sec/batch Epoch: 7/20... Training Step: 2990... Training loss: 1.1056... 0.1214 sec/batch Epoch: 7/20... Training Step: 2991... Training loss: 1.1471... 0.1236 sec/batch Epoch: 7/20... Training Step: 2992... Training loss: 1.3841... 0.1199 sec/batch Epoch: 7/20... Training Step: 2993... Training loss: 1.2974... 0.1326 sec/batch Epoch: 7/20... Training Step: 2994... Training loss: 1.1223... 0.1776 sec/batch Epoch: 7/20... Training Step: 2995... Training loss: 1.1056... 0.1571 sec/batch Epoch: 7/20... Training Step: 2996... Training loss: 1.3590... 0.1420 sec/batch Epoch: 7/20... Training Step: 2997... Training loss: 1.3812... 0.1396 sec/batch Epoch: 7/20... Training Step: 2998... Training loss: 1.2572... 0.1149 sec/batch Epoch: 7/20... Training Step: 2999... Training loss: 1.3649... 0.1176 sec/batch Epoch: 7/20... Training Step: 3000... Training loss: 1.1410... 0.1182 sec/batch Epoch: 7/20... Training Step: 3001... Training loss: 1.3187... 0.1238 sec/batch Epoch: 7/20... Training Step: 3002... Training loss: 1.2409... 0.1212 sec/batch Epoch: 7/20... Training Step: 3003... Training loss: 1.3818... 0.1233 sec/batch Epoch: 7/20... Training Step: 3004... Training loss: 1.2768... 0.1206 sec/batch Epoch: 7/20... Training Step: 3005... Training loss: 1.1818... 0.1230 sec/batch Epoch: 7/20... Training Step: 3006... Training loss: 1.5467... 0.1201 sec/batch Epoch: 7/20... Training Step: 3007... Training loss: 1.3526... 0.1227 sec/batch Epoch: 7/20... Training Step: 3008... Training loss: 1.5588... 0.1249 sec/batch Epoch: 7/20... Training Step: 3009... Training loss: 1.2423... 0.1187 sec/batch Epoch: 7/20... Training Step: 3010... Training loss: 1.3806... 0.1207 sec/batch Epoch: 7/20... Training Step: 3011... Training loss: 1.3839... 0.1304 sec/batch Epoch: 7/20... Training Step: 3012... Training loss: 1.2262... 0.1309 sec/batch Epoch: 7/20... Training Step: 3013... Training loss: 1.2710... 0.1349 sec/batch Epoch: 7/20... Training Step: 3014... Training loss: 1.2469... 0.1354 sec/batch Epoch: 7/20... Training Step: 3015... Training loss: 1.2859... 0.1282 sec/batch Epoch: 7/20... Training Step: 3016... Training loss: 1.2387... 0.1272 sec/batch Epoch: 7/20... Training Step: 3017... Training loss: 1.5192... 0.1197 sec/batch Epoch: 7/20... Training Step: 3018... Training loss: 1.2299... 0.1232 sec/batch Epoch: 7/20... Training Step: 3019... Training loss: 1.4920... 0.1184 sec/batch Epoch: 7/20... Training Step: 3020... Training loss: 1.2855... 0.1184 sec/batch Epoch: 7/20... Training Step: 3021... Training loss: 1.5329... 0.1233 sec/batch Epoch: 7/20... Training Step: 3022... Training loss: 1.0556... 0.1228 sec/batch Epoch: 7/20... Training Step: 3023... Training loss: 1.2808... 0.1174 sec/batch Epoch: 7/20... Training Step: 3024... Training loss: 1.3961... 0.1226 sec/batch Epoch: 7/20... Training Step: 3025... Training loss: 1.1147... 0.1225 sec/batch Epoch: 7/20... Training Step: 3026... Training loss: 1.1854... 0.1183 sec/batch Epoch: 7/20... Training Step: 3027... Training loss: 1.3955... 0.1187 sec/batch Epoch: 7/20... Training Step: 3028... Training loss: 1.3307... 0.1233 sec/batch Epoch: 7/20... Training Step: 3029... Training loss: 1.3542... 0.1219 sec/batch Epoch: 7/20... Training Step: 3030... Training loss: 1.1325... 0.1231 sec/batch Epoch: 7/20... Training Step: 3031... Training loss: 1.1863... 0.1202 sec/batch Epoch: 7/20... Training Step: 3032... Training loss: 1.4447... 0.1209 sec/batch Epoch: 7/20... Training Step: 3033... Training loss: 1.2066... 0.1218 sec/batch Epoch: 7/20... Training Step: 3034... Training loss: 1.3334... 0.1214 sec/batch Epoch: 7/20... Training Step: 3035... Training loss: 1.4566... 0.1270 sec/batch Epoch: 7/20... Training Step: 3036... Training loss: 1.3020... 0.1329 sec/batch Epoch: 7/20... Training Step: 3037... Training loss: 1.1827... 0.1238 sec/batch Epoch: 7/20... Training Step: 3038... Training loss: 1.2752... 0.1216 sec/batch Epoch: 7/20... Training Step: 3039... Training loss: 1.2711... 0.1201 sec/batch Epoch: 7/20... Training Step: 3040... Training loss: 1.3104... 0.1230 sec/batch Epoch: 7/20... Training Step: 3041... Training loss: 1.3659... 0.1229 sec/batch Epoch: 7/20... Training Step: 3042... Training loss: 1.1547... 0.1210 sec/batch Epoch: 7/20... Training Step: 3043... Training loss: 1.1271... 0.1206 sec/batch Epoch: 7/20... Training Step: 3044... Training loss: 1.2393... 0.1237 sec/batch Epoch: 7/20... Training Step: 3045... Training loss: 1.3038... 0.1250 sec/batch Epoch: 7/20... Training Step: 3046... Training loss: 1.4131... 0.1187 sec/batch Epoch: 7/20... Training Step: 3047... Training loss: 1.1953... 0.1241 sec/batch Epoch: 7/20... Training Step: 3048... Training loss: 1.3392... 0.1192 sec/batch Epoch: 7/20... Training Step: 3049... Training loss: 1.3827... 0.1146 sec/batch Epoch: 7/20... Training Step: 3050... Training loss: 1.3188... 0.1237 sec/batch Epoch: 7/20... Training Step: 3051... Training loss: 1.3933... 0.1206 sec/batch Epoch: 7/20... Training Step: 3052... Training loss: 1.4441... 0.1195 sec/batch Epoch: 7/20... Training Step: 3053... Training loss: 1.2725... 0.1225 sec/batch Epoch: 7/20... Training Step: 3054... Training loss: 1.4141... 0.1203 sec/batch Epoch: 7/20... Training Step: 3055... Training loss: 1.2908... 0.1206 sec/batch Epoch: 7/20... Training Step: 3056... Training loss: 1.4566... 0.1222 sec/batch Epoch: 7/20... Training Step: 3057... Training loss: 1.3350... 0.1209 sec/batch Epoch: 7/20... Training Step: 3058... Training loss: 1.2420... 0.1215 sec/batch Epoch: 7/20... Training Step: 3059... Training loss: 1.3743... 0.1237 sec/batch Epoch: 7/20... Training Step: 3060... Training loss: 1.2568... 0.1223 sec/batch Epoch: 7/20... Training Step: 3061... Training loss: 1.2991... 0.1196 sec/batch Epoch: 7/20... Training Step: 3062... Training loss: 1.4898... 0.1215 sec/batch Epoch: 7/20... Training Step: 3063... Training loss: 1.2120... 0.1263 sec/batch Epoch: 7/20... Training Step: 3064... Training loss: 1.2244... 0.1207 sec/batch Epoch: 7/20... Training Step: 3065... Training loss: 1.2063... 0.1231 sec/batch Epoch: 7/20... Training Step: 3066... Training loss: 1.1842... 0.1177 sec/batch Epoch: 7/20... Training Step: 3067... Training loss: 1.2801... 0.1202 sec/batch Epoch: 7/20... Training Step: 3068... Training loss: 1.2744... 0.1237 sec/batch Epoch: 7/20... Training Step: 3069... Training loss: 1.1272... 0.1256 sec/batch Epoch: 7/20... Training Step: 3070... Training loss: 1.2795... 0.1220 sec/batch Epoch: 7/20... Training Step: 3071... Training loss: 1.3246... 0.1204 sec/batch Epoch: 7/20... Training Step: 3072... Training loss: 1.2986... 0.1253 sec/batch Epoch: 7/20... Training Step: 3073... Training loss: 1.3096... 0.1177 sec/batch Epoch: 7/20... Training Step: 3074... Training loss: 1.2854... 0.1222 sec/batch Epoch: 7/20... Training Step: 3075... Training loss: 1.3599... 0.1206 sec/batch Epoch: 7/20... Training Step: 3076... Training loss: 1.1808... 0.1238 sec/batch Epoch: 7/20... Training Step: 3077... Training loss: 1.1529... 0.1206 sec/batch Epoch: 7/20... Training Step: 3078... Training loss: 1.2824... 0.1236 sec/batch Epoch: 7/20... Training Step: 3079... Training loss: 1.2650... 0.1201 sec/batch Epoch: 7/20... Training Step: 3080... Training loss: 1.4909... 0.1239 sec/batch Epoch: 7/20... Training Step: 3081... Training loss: 1.2227... 0.1255 sec/batch Epoch: 7/20... Training Step: 3082... Training loss: 1.2138... 0.1177 sec/batch Epoch: 7/20... Training Step: 3083... Training loss: 1.2459... 0.1201 sec/batch Epoch: 7/20... Training Step: 3084... Training loss: 1.1699... 0.1243 sec/batch Epoch: 7/20... Training Step: 3085... Training loss: 1.2717... 0.1200 sec/batch Epoch: 7/20... Training Step: 3086... Training loss: 1.2582... 0.1228 sec/batch Epoch: 7/20... Training Step: 3087... Training loss: 1.1604... 0.1242 sec/batch Epoch: 7/20... Training Step: 3088... Training loss: 1.4051... 0.1232 sec/batch Epoch: 7/20... Training Step: 3089... Training loss: 1.1313... 0.1190 sec/batch Epoch: 7/20... Training Step: 3090... Training loss: 1.2458... 0.1222 sec/batch Epoch: 7/20... Training Step: 3091... Training loss: 1.2898... 0.1258 sec/batch Epoch: 7/20... Training Step: 3092... Training loss: 1.6273... 0.1236 sec/batch Epoch: 7/20... Training Step: 3093... Training loss: 1.3822... 0.1207 sec/batch Epoch: 7/20... Training Step: 3094... Training loss: 1.2632... 0.1207 sec/batch Epoch: 7/20... Training Step: 3095... Training loss: 1.2501... 0.1200 sec/batch Epoch: 7/20... Training Step: 3096... Training loss: 1.1641... 0.1200 sec/batch Epoch: 7/20... Training Step: 3097... Training loss: 1.2217... 0.1224 sec/batch Epoch: 7/20... Training Step: 3098... Training loss: 1.1319... 0.1251 sec/batch Epoch: 7/20... Training Step: 3099... Training loss: 0.9536... 0.1206 sec/batch Epoch: 7/20... Training Step: 3100... Training loss: 1.0892... 0.1232 sec/batch Epoch: 7/20... Training Step: 3101... Training loss: 1.1958... 0.1231 sec/batch Epoch: 7/20... Training Step: 3102... Training loss: 1.2396... 0.1185 sec/batch Epoch: 7/20... Training Step: 3103... Training loss: 1.2405... 0.1199 sec/batch Epoch: 7/20... Training Step: 3104... Training loss: 1.0964... 0.1159 sec/batch Epoch: 7/20... Training Step: 3105... Training loss: 1.1538... 0.1190 sec/batch Epoch: 7/20... Training Step: 3106... Training loss: 1.3817... 0.1235 sec/batch Epoch: 7/20... Training Step: 3107... Training loss: 1.0997... 0.1244 sec/batch Epoch: 7/20... Training Step: 3108... Training loss: 1.0852... 0.1218 sec/batch Epoch: 7/20... Training Step: 3109... Training loss: 1.1240... 0.1254 sec/batch Epoch: 7/20... Training Step: 3110... Training loss: 0.9437... 0.1216 sec/batch Epoch: 7/20... Training Step: 3111... Training loss: 1.2613... 0.1207 sec/batch Epoch: 7/20... Training Step: 3112... Training loss: 1.1648... 0.1221 sec/batch Epoch: 7/20... Training Step: 3113... Training loss: 1.3288... 0.1311 sec/batch Epoch: 7/20... Training Step: 3114... Training loss: 1.1001... 0.1253 sec/batch Epoch: 7/20... Training Step: 3115... Training loss: 1.2499... 0.1231 sec/batch Epoch: 7/20... Training Step: 3116... Training loss: 1.2215... 0.1233 sec/batch Epoch: 7/20... Training Step: 3117... Training loss: 1.0433... 0.1164 sec/batch Epoch: 7/20... Training Step: 3118... Training loss: 1.2437... 0.1248 sec/batch Epoch: 7/20... Training Step: 3119... Training loss: 1.2448... 0.1195 sec/batch Epoch: 7/20... Training Step: 3120... Training loss: 1.1497... 0.1224 sec/batch Epoch: 7/20... Training Step: 3121... Training loss: 1.2094... 0.1209 sec/batch Epoch: 7/20... Training Step: 3122... Training loss: 1.0038... 0.1168 sec/batch Epoch: 7/20... Training Step: 3123... Training loss: 1.2565... 0.1219 sec/batch Epoch: 7/20... Training Step: 3124... Training loss: 1.1723... 0.1196 sec/batch Epoch: 7/20... Training Step: 3125... Training loss: 1.1583... 0.1232 sec/batch Epoch: 7/20... Training Step: 3126... Training loss: 1.3479... 0.1240 sec/batch Epoch: 7/20... Training Step: 3127... Training loss: 1.0287... 0.1221 sec/batch Epoch: 7/20... Training Step: 3128... Training loss: 1.4796... 0.1188 sec/batch Epoch: 7/20... Training Step: 3129... Training loss: 1.1575... 0.1238 sec/batch Epoch: 7/20... Training Step: 3130... Training loss: 1.0189... 0.1274 sec/batch Epoch: 7/20... Training Step: 3131... Training loss: 1.0379... 0.1246 sec/batch Epoch: 7/20... Training Step: 3132... Training loss: 1.5389... 0.1242 sec/batch Epoch: 7/20... Training Step: 3133... Training loss: 1.1431... 0.1231 sec/batch Epoch: 7/20... Training Step: 3134... Training loss: 1.3206... 0.1172 sec/batch Epoch: 7/20... Training Step: 3135... Training loss: 1.1721... 0.1214 sec/batch Epoch: 7/20... Training Step: 3136... Training loss: 1.1851... 0.1199 sec/batch Epoch: 7/20... Training Step: 3137... Training loss: 1.0153... 0.1205 sec/batch Epoch: 7/20... Training Step: 3138... Training loss: 0.9928... 0.1217 sec/batch Epoch: 7/20... Training Step: 3139... Training loss: 1.2778... 0.1231 sec/batch Epoch: 7/20... Training Step: 3140... Training loss: 1.2535... 0.1228 sec/batch Epoch: 7/20... Training Step: 3141... Training loss: 1.0508... 0.1195 sec/batch Epoch: 7/20... Training Step: 3142... Training loss: 1.2995... 0.1248 sec/batch Epoch: 7/20... Training Step: 3143... Training loss: 1.4199... 0.1207 sec/batch Epoch: 7/20... Training Step: 3144... Training loss: 1.0698... 0.1199 sec/batch Epoch: 7/20... Training Step: 3145... Training loss: 1.3312... 0.1240 sec/batch Epoch: 7/20... Training Step: 3146... Training loss: 1.1414... 0.1236 sec/batch Epoch: 7/20... Training Step: 3147... Training loss: 1.1476... 0.1207 sec/batch Epoch: 7/20... Training Step: 3148... Training loss: 1.1760... 0.1192 sec/batch Epoch: 7/20... Training Step: 3149... Training loss: 1.2606... 0.1268 sec/batch Epoch: 7/20... Training Step: 3150... Training loss: 1.2919... 0.1197 sec/batch Epoch: 7/20... Training Step: 3151... Training loss: 1.1448... 0.1192 sec/batch Epoch: 7/20... Training Step: 3152... Training loss: 1.3525... 0.1209 sec/batch Epoch: 7/20... Training Step: 3153... Training loss: 1.3536... 0.1246 sec/batch Epoch: 7/20... Training Step: 3154... Training loss: 1.2643... 0.1255 sec/batch Epoch: 7/20... Training Step: 3155... Training loss: 1.1279... 0.1259 sec/batch Epoch: 7/20... Training Step: 3156... Training loss: 1.1800... 0.1216 sec/batch Epoch: 7/20... Training Step: 3157... Training loss: 1.0797... 0.1177 sec/batch Epoch: 7/20... Training Step: 3158... Training loss: 1.3322... 0.1192 sec/batch Epoch: 7/20... Training Step: 3159... Training loss: 1.2452... 0.1296 sec/batch Epoch: 7/20... Training Step: 3160... Training loss: 1.2592... 0.1154 sec/batch Epoch: 7/20... Training Step: 3161... Training loss: 1.3449... 0.1227 sec/batch Epoch: 7/20... Training Step: 3162... Training loss: 1.3945... 0.1267 sec/batch Epoch: 7/20... Training Step: 3163... Training loss: 1.1524... 0.1232 sec/batch Epoch: 7/20... Training Step: 3164... Training loss: 1.2772... 0.1217 sec/batch Epoch: 7/20... Training Step: 3165... Training loss: 1.1202... 0.1247 sec/batch Epoch: 7/20... Training Step: 3166... Training loss: 1.1862... 0.1196 sec/batch Epoch: 7/20... Training Step: 3167... Training loss: 1.1262... 0.1253 sec/batch Epoch: 7/20... Training Step: 3168... Training loss: 1.2611... 0.1214 sec/batch Epoch: 7/20... Training Step: 3169... Training loss: 1.1657... 0.1249 sec/batch Epoch: 7/20... Training Step: 3170... Training loss: 1.2976... 0.1234 sec/batch Epoch: 7/20... Training Step: 3171... Training loss: 1.2226... 0.1234 sec/batch Epoch: 7/20... Training Step: 3172... Training loss: 1.0814... 0.1241 sec/batch Epoch: 7/20... Training Step: 3173... Training loss: 1.1090... 0.1260 sec/batch Epoch: 7/20... Training Step: 3174... Training loss: 1.2359... 0.1218 sec/batch Epoch: 7/20... Training Step: 3175... Training loss: 1.1152... 0.1235 sec/batch Epoch: 7/20... Training Step: 3176... Training loss: 1.0353... 0.1213 sec/batch Epoch: 7/20... Training Step: 3177... Training loss: 1.1522... 0.1173 sec/batch Epoch: 7/20... Training Step: 3178... Training loss: 1.1130... 0.1174 sec/batch Epoch: 7/20... Training Step: 3179... Training loss: 1.1318... 0.1260 sec/batch Epoch: 7/20... Training Step: 3180... Training loss: 1.4845... 0.1201 sec/batch Epoch: 7/20... Training Step: 3181... Training loss: 1.2188... 0.1237 sec/batch Epoch: 7/20... Training Step: 3182... Training loss: 1.1568... 0.1211 sec/batch Epoch: 7/20... Training Step: 3183... Training loss: 1.2438... 0.1191 sec/batch Epoch: 7/20... Training Step: 3184... Training loss: 1.0755... 0.1173 sec/batch Epoch: 7/20... Training Step: 3185... Training loss: 1.1461... 0.1219 sec/batch Epoch: 7/20... Training Step: 3186... Training loss: 1.1495... 0.1245 sec/batch Epoch: 7/20... Training Step: 3187... Training loss: 1.0971... 0.1208 sec/batch Epoch: 7/20... Training Step: 3188... Training loss: 1.2161... 0.1212 sec/batch Epoch: 7/20... Training Step: 3189... Training loss: 1.3190... 0.1212 sec/batch Epoch: 7/20... Training Step: 3190... Training loss: 1.4433... 0.1270 sec/batch Epoch: 7/20... Training Step: 3191... Training loss: 1.1874... 0.1199 sec/batch Epoch: 7/20... Training Step: 3192... Training loss: 1.5070... 0.1215 sec/batch Epoch: 7/20... Training Step: 3193... Training loss: 1.3069... 0.1187 sec/batch Epoch: 7/20... Training Step: 3194... Training loss: 1.2269... 0.1218 sec/batch Epoch: 7/20... Training Step: 3195... Training loss: 1.1359... 0.1214 sec/batch Epoch: 7/20... Training Step: 3196... Training loss: 1.1190... 0.1262 sec/batch Epoch: 7/20... Training Step: 3197... Training loss: 1.3502... 0.1236 sec/batch Epoch: 7/20... Training Step: 3198... Training loss: 1.2827... 0.1229 sec/batch Epoch: 7/20... Training Step: 3199... Training loss: 1.3563... 0.1203 sec/batch Epoch: 7/20... Training Step: 3200... Training loss: 1.4287... 0.1217 sec/batch Epoch: 7/20... Training Step: 3201... Training loss: 1.3330... 0.1253 sec/batch Epoch: 7/20... Training Step: 3202... Training loss: 1.1027... 0.1203 sec/batch Epoch: 7/20... Training Step: 3203... Training loss: 1.1840... 0.1211 sec/batch Epoch: 7/20... Training Step: 3204... Training loss: 1.0667... 0.1236 sec/batch Epoch: 7/20... Training Step: 3205... Training loss: 1.2822... 0.1183 sec/batch Epoch: 7/20... Training Step: 3206... Training loss: 1.2431... 0.1190 sec/batch Epoch: 7/20... Training Step: 3207... Training loss: 1.2613... 0.1209 sec/batch Epoch: 7/20... Training Step: 3208... Training loss: 1.4000... 0.1229 sec/batch Epoch: 7/20... Training Step: 3209... Training loss: 1.1610... 0.1239 sec/batch Epoch: 7/20... Training Step: 3210... Training loss: 1.2771... 0.1214 sec/batch Epoch: 7/20... Training Step: 3211... Training loss: 1.2774... 0.1231 sec/batch Epoch: 7/20... Training Step: 3212... Training loss: 1.3404... 0.1235 sec/batch Epoch: 7/20... Training Step: 3213... Training loss: 1.2024... 0.1273 sec/batch Epoch: 7/20... Training Step: 3214... Training loss: 1.2484... 0.1248 sec/batch Epoch: 7/20... Training Step: 3215... Training loss: 1.5099... 0.1187 sec/batch Epoch: 7/20... Training Step: 3216... Training loss: 1.2710... 0.1131 sec/batch Epoch: 7/20... Training Step: 3217... Training loss: 1.3640... 0.1195 sec/batch Epoch: 7/20... Training Step: 3218... Training loss: 1.2738... 0.1110 sec/batch Epoch: 7/20... Training Step: 3219... Training loss: 1.1394... 0.1197 sec/batch Epoch: 7/20... Training Step: 3220... Training loss: 1.3137... 0.1170 sec/batch Epoch: 7/20... Training Step: 3221... Training loss: 1.1610... 0.1235 sec/batch Epoch: 7/20... Training Step: 3222... Training loss: 1.3274... 0.1193 sec/batch Epoch: 7/20... Training Step: 3223... Training loss: 1.4552... 0.1162 sec/batch Epoch: 7/20... Training Step: 3224... Training loss: 1.5764... 0.1195 sec/batch Epoch: 7/20... Training Step: 3225... Training loss: 1.2216... 0.1169 sec/batch Epoch: 7/20... Training Step: 3226... Training loss: 1.2733... 0.1151 sec/batch Epoch: 7/20... Training Step: 3227... Training loss: 1.3239... 0.1201 sec/batch Epoch: 7/20... Training Step: 3228... Training loss: 1.2744... 0.1209 sec/batch Epoch: 7/20... Training Step: 3229... Training loss: 1.2496... 0.1150 sec/batch Epoch: 7/20... Training Step: 3230... Training loss: 1.1760... 0.1186 sec/batch Epoch: 7/20... Training Step: 3231... Training loss: 1.2811... 0.1225 sec/batch Epoch: 7/20... Training Step: 3232... Training loss: 1.1643... 0.1162 sec/batch Epoch: 7/20... Training Step: 3233... Training loss: 1.3005... 0.1253 sec/batch Epoch: 7/20... Training Step: 3234... Training loss: 1.2522... 0.1355 sec/batch Epoch: 7/20... Training Step: 3235... Training loss: 1.1556... 0.1162 sec/batch Epoch: 7/20... Training Step: 3236... Training loss: 1.3389... 0.1157 sec/batch Epoch: 7/20... Training Step: 3237... Training loss: 1.0780... 0.1189 sec/batch Epoch: 7/20... Training Step: 3238... Training loss: 1.6944... 0.1183 sec/batch Epoch: 7/20... Training Step: 3239... Training loss: 1.3899... 0.1157 sec/batch Epoch: 7/20... Training Step: 3240... Training loss: 1.0673... 0.1183 sec/batch Epoch: 7/20... Training Step: 3241... Training loss: 1.1741... 0.1136 sec/batch Epoch: 7/20... Training Step: 3242... Training loss: 0.9654... 0.1160 sec/batch Epoch: 7/20... Training Step: 3243... Training loss: 1.1513... 0.1128 sec/batch Epoch: 7/20... Training Step: 3244... Training loss: 1.3113... 0.1196 sec/batch Epoch: 7/20... Training Step: 3245... Training loss: 1.2757... 0.1146 sec/batch Epoch: 7/20... Training Step: 3246... Training loss: 1.1444... 0.1208 sec/batch Epoch: 7/20... Training Step: 3247... Training loss: 1.3500... 0.1201 sec/batch Epoch: 7/20... Training Step: 3248... Training loss: 1.1983... 0.1170 sec/batch Epoch: 8/20... Training Step: 3249... Training loss: 1.5048... 0.1224 sec/batch Epoch: 8/20... Training Step: 3250... Training loss: 1.3285... 0.1195 sec/batch Epoch: 8/20... Training Step: 3251... Training loss: 1.2784... 0.1151 sec/batch Epoch: 8/20... Training Step: 3252... Training loss: 1.1725... 0.1168 sec/batch Epoch: 8/20... Training Step: 3253... Training loss: 1.2325... 0.1186 sec/batch Epoch: 8/20... Training Step: 3254... Training loss: 1.0584... 0.1219 sec/batch Epoch: 8/20... Training Step: 3255... Training loss: 1.3989... 0.1168 sec/batch Epoch: 8/20... Training Step: 3256... Training loss: 1.0949... 0.1180 sec/batch Epoch: 8/20... Training Step: 3257... Training loss: 1.0393... 0.1195 sec/batch Epoch: 8/20... Training Step: 3258... Training loss: 1.2853... 0.1170 sec/batch Epoch: 8/20... Training Step: 3259... Training loss: 1.1439... 0.1178 sec/batch Epoch: 8/20... Training Step: 3260... Training loss: 1.0540... 0.1162 sec/batch Epoch: 8/20... Training Step: 3261... Training loss: 1.4176... 0.1275 sec/batch Epoch: 8/20... Training Step: 3262... Training loss: 1.0036... 0.1250 sec/batch Epoch: 8/20... Training Step: 3263... Training loss: 1.2504... 0.1215 sec/batch Epoch: 8/20... Training Step: 3264... Training loss: 1.4186... 0.1147 sec/batch Epoch: 8/20... Training Step: 3265... Training loss: 1.1110... 0.1210 sec/batch Epoch: 8/20... Training Step: 3266... Training loss: 1.1113... 0.1197 sec/batch Epoch: 8/20... Training Step: 3267... Training loss: 1.2669... 0.1169 sec/batch Epoch: 8/20... Training Step: 3268... Training loss: 1.0783... 0.1187 sec/batch Epoch: 8/20... Training Step: 3269... Training loss: 1.3684... 0.1147 sec/batch Epoch: 8/20... Training Step: 3270... Training loss: 1.1295... 0.1194 sec/batch Epoch: 8/20... Training Step: 3271... Training loss: 1.3864... 0.1181 sec/batch Epoch: 8/20... Training Step: 3272... Training loss: 1.2161... 0.1219 sec/batch Epoch: 8/20... Training Step: 3273... Training loss: 1.1874... 0.1178 sec/batch Epoch: 8/20... Training Step: 3274... Training loss: 1.2300... 0.1273 sec/batch Epoch: 8/20... Training Step: 3275... Training loss: 1.2638... 0.1222 sec/batch Epoch: 8/20... Training Step: 3276... Training loss: 1.1008... 0.1295 sec/batch Epoch: 8/20... Training Step: 3277... Training loss: 1.0863... 0.1225 sec/batch Epoch: 8/20... Training Step: 3278... Training loss: 1.2468... 0.1249 sec/batch Epoch: 8/20... Training Step: 3279... Training loss: 1.0180... 0.1236 sec/batch Epoch: 8/20... Training Step: 3280... Training loss: 1.0786... 0.1234 sec/batch Epoch: 8/20... Training Step: 3281... Training loss: 1.0152... 0.1214 sec/batch Epoch: 8/20... Training Step: 3282... Training loss: 1.0477... 0.1289 sec/batch Epoch: 8/20... Training Step: 3283... Training loss: 1.0578... 0.1253 sec/batch Epoch: 8/20... Training Step: 3284... Training loss: 1.2052... 0.1162 sec/batch Epoch: 8/20... Training Step: 3285... Training loss: 1.2091... 0.1221 sec/batch Epoch: 8/20... Training Step: 3286... Training loss: 1.0305... 0.1235 sec/batch Epoch: 8/20... Training Step: 3287... Training loss: 1.1228... 0.1342 sec/batch Epoch: 8/20... Training Step: 3288... Training loss: 1.4156... 0.1279 sec/batch Epoch: 8/20... Training Step: 3289... Training loss: 1.1858... 0.1318 sec/batch Epoch: 8/20... Training Step: 3290... Training loss: 1.1119... 0.1210 sec/batch Epoch: 8/20... Training Step: 3291... Training loss: 1.2720... 0.1224 sec/batch Epoch: 8/20... Training Step: 3292... Training loss: 1.0342... 0.1176 sec/batch Epoch: 8/20... Training Step: 3293... Training loss: 1.0997... 0.1191 sec/batch Epoch: 8/20... Training Step: 3294... Training loss: 1.1066... 0.1188 sec/batch Epoch: 8/20... Training Step: 3295... Training loss: 1.1851... 0.1209 sec/batch Epoch: 8/20... Training Step: 3296... Training loss: 1.1577... 0.1175 sec/batch Epoch: 8/20... Training Step: 3297... Training loss: 1.1152... 0.1170 sec/batch Epoch: 8/20... Training Step: 3298... Training loss: 1.2337... 0.1165 sec/batch Epoch: 8/20... Training Step: 3299... Training loss: 1.1009... 0.1216 sec/batch Epoch: 8/20... Training Step: 3300... Training loss: 1.2324... 0.1157 sec/batch Epoch: 8/20... Training Step: 3301... Training loss: 1.1964... 0.1212 sec/batch Epoch: 8/20... Training Step: 3302... Training loss: 1.2295... 0.1203 sec/batch Epoch: 8/20... Training Step: 3303... Training loss: 0.9775... 0.1203 sec/batch Epoch: 8/20... Training Step: 3304... Training loss: 1.0413... 0.1185 sec/batch Epoch: 8/20... Training Step: 3305... Training loss: 1.1191... 0.1181 sec/batch Epoch: 8/20... Training Step: 3306... Training loss: 1.1647... 0.1211 sec/batch Epoch: 8/20... Training Step: 3307... Training loss: 0.9222... 0.1186 sec/batch Epoch: 8/20... Training Step: 3308... Training loss: 1.1429... 0.1202 sec/batch Epoch: 8/20... Training Step: 3309... Training loss: 1.1077... 0.1197 sec/batch Epoch: 8/20... Training Step: 3310... Training loss: 1.3185... 0.1227 sec/batch Epoch: 8/20... Training Step: 3311... Training loss: 1.0749... 0.1178 sec/batch Epoch: 8/20... Training Step: 3312... Training loss: 1.1811... 0.1203 sec/batch Epoch: 8/20... Training Step: 3313... Training loss: 1.0664... 0.1171 sec/batch Epoch: 8/20... Training Step: 3314... Training loss: 1.3363... 0.1219 sec/batch Epoch: 8/20... Training Step: 3315... Training loss: 1.1278... 0.1188 sec/batch Epoch: 8/20... Training Step: 3316... Training loss: 1.2457... 0.1165 sec/batch Epoch: 8/20... Training Step: 3317... Training loss: 1.1585... 0.1195 sec/batch Epoch: 8/20... Training Step: 3318... Training loss: 1.1996... 0.1180 sec/batch Epoch: 8/20... Training Step: 3319... Training loss: 1.2517... 0.1208 sec/batch Epoch: 8/20... Training Step: 3320... Training loss: 1.0945... 0.1189 sec/batch Epoch: 8/20... Training Step: 3321... Training loss: 1.2217... 0.1149 sec/batch Epoch: 8/20... Training Step: 3322... Training loss: 1.0205... 0.1185 sec/batch Epoch: 8/20... Training Step: 3323... Training loss: 1.5043... 0.1173 sec/batch Epoch: 8/20... Training Step: 3324... Training loss: 1.0886... 0.1203 sec/batch Epoch: 8/20... Training Step: 3325... Training loss: 1.1459... 0.1165 sec/batch Epoch: 8/20... Training Step: 3326... Training loss: 1.2019... 0.1134 sec/batch Epoch: 8/20... Training Step: 3327... Training loss: 1.2495... 0.1191 sec/batch Epoch: 8/20... Training Step: 3328... Training loss: 1.1023... 0.1201 sec/batch Epoch: 8/20... Training Step: 3329... Training loss: 1.3687... 0.1214 sec/batch Epoch: 8/20... Training Step: 3330... Training loss: 1.2268... 0.1159 sec/batch Epoch: 8/20... Training Step: 3331... Training loss: 1.0678... 0.1138 sec/batch Epoch: 8/20... Training Step: 3332... Training loss: 1.3630... 0.1199 sec/batch Epoch: 8/20... Training Step: 3333... Training loss: 1.2492... 0.1207 sec/batch Epoch: 8/20... Training Step: 3334... Training loss: 1.2818... 0.1146 sec/batch Epoch: 8/20... Training Step: 3335... Training loss: 1.0959... 0.1158 sec/batch Epoch: 8/20... Training Step: 3336... Training loss: 1.2039... 0.1217 sec/batch Epoch: 8/20... Training Step: 3337... Training loss: 1.3188... 0.1202 sec/batch Epoch: 8/20... Training Step: 3338... Training loss: 1.2181... 0.1146 sec/batch Epoch: 8/20... Training Step: 3339... Training loss: 1.3140... 0.1168 sec/batch Epoch: 8/20... Training Step: 3340... Training loss: 1.3705... 0.1164 sec/batch Epoch: 8/20... Training Step: 3341... Training loss: 1.0456... 0.1144 sec/batch Epoch: 8/20... Training Step: 3342... Training loss: 1.3037... 0.1155 sec/batch Epoch: 8/20... Training Step: 3343... Training loss: 1.2118... 0.1194 sec/batch Epoch: 8/20... Training Step: 3344... Training loss: 1.2380... 0.1107 sec/batch Epoch: 8/20... Training Step: 3345... Training loss: 1.4454... 0.1177 sec/batch Epoch: 8/20... Training Step: 3346... Training loss: 1.3446... 0.1160 sec/batch Epoch: 8/20... Training Step: 3347... Training loss: 1.3090... 0.1182 sec/batch Epoch: 8/20... Training Step: 3348... Training loss: 1.1442... 0.1187 sec/batch Epoch: 8/20... Training Step: 3349... Training loss: 1.2476... 0.1221 sec/batch Epoch: 8/20... Training Step: 3350... Training loss: 1.2957... 0.1176 sec/batch Epoch: 8/20... Training Step: 3351... Training loss: 1.4406... 0.1182 sec/batch Epoch: 8/20... Training Step: 3352... Training loss: 1.1937... 0.1184 sec/batch Epoch: 8/20... Training Step: 3353... Training loss: 1.3844... 0.1179 sec/batch Epoch: 8/20... Training Step: 3354... Training loss: 1.4356... 0.1168 sec/batch Epoch: 8/20... Training Step: 3355... Training loss: 1.2225... 0.1209 sec/batch Epoch: 8/20... Training Step: 3356... Training loss: 1.2872... 0.1239 sec/batch Epoch: 8/20... Training Step: 3357... Training loss: 1.2375... 0.1123 sec/batch Epoch: 8/20... Training Step: 3358... Training loss: 1.1250... 0.1192 sec/batch Epoch: 8/20... Training Step: 3359... Training loss: 1.2729... 0.1184 sec/batch Epoch: 8/20... Training Step: 3360... Training loss: 1.1384... 0.1177 sec/batch Epoch: 8/20... Training Step: 3361... Training loss: 1.2029... 0.1215 sec/batch Epoch: 8/20... Training Step: 3362... Training loss: 1.3879... 0.1156 sec/batch Epoch: 8/20... Training Step: 3363... Training loss: 1.2279... 0.1147 sec/batch Epoch: 8/20... Training Step: 3364... Training loss: 1.2931... 0.1166 sec/batch Epoch: 8/20... Training Step: 3365... Training loss: 1.2731... 0.1220 sec/batch Epoch: 8/20... Training Step: 3366... Training loss: 1.3373... 0.1222 sec/batch Epoch: 8/20... Training Step: 3367... Training loss: 1.1943... 0.1142 sec/batch Epoch: 8/20... Training Step: 3368... Training loss: 1.0630... 0.1221 sec/batch Epoch: 8/20... Training Step: 3369... Training loss: 1.2677... 0.1182 sec/batch Epoch: 8/20... Training Step: 3370... Training loss: 1.2003... 0.1140 sec/batch Epoch: 8/20... Training Step: 3371... Training loss: 1.2572... 0.1197 sec/batch Epoch: 8/20... Training Step: 3372... Training loss: 1.2582... 0.1141 sec/batch Epoch: 8/20... Training Step: 3373... Training loss: 1.2401... 0.1205 sec/batch Epoch: 8/20... Training Step: 3374... Training loss: 1.0791... 0.1166 sec/batch Epoch: 8/20... Training Step: 3375... Training loss: 1.0402... 0.1176 sec/batch Epoch: 8/20... Training Step: 3376... Training loss: 1.3166... 0.1194 sec/batch Epoch: 8/20... Training Step: 3377... Training loss: 1.2331... 0.1151 sec/batch Epoch: 8/20... Training Step: 3378... Training loss: 1.2786... 0.1227 sec/batch Epoch: 8/20... Training Step: 3379... Training loss: 1.5033... 0.1166 sec/batch Epoch: 8/20... Training Step: 3380... Training loss: 1.2407... 0.1162 sec/batch Epoch: 8/20... Training Step: 3381... Training loss: 1.2283... 0.1203 sec/batch Epoch: 8/20... Training Step: 3382... Training loss: 1.3507... 0.1160 sec/batch Epoch: 8/20... Training Step: 3383... Training loss: 1.0793... 0.1169 sec/batch Epoch: 8/20... Training Step: 3384... Training loss: 0.9898... 0.1251 sec/batch Epoch: 8/20... Training Step: 3385... Training loss: 1.0367... 0.1183 sec/batch Epoch: 8/20... Training Step: 3386... Training loss: 1.2980... 0.1177 sec/batch Epoch: 8/20... Training Step: 3387... Training loss: 1.1272... 0.1252 sec/batch Epoch: 8/20... Training Step: 3388... Training loss: 1.1748... 0.1325 sec/batch Epoch: 8/20... Training Step: 3389... Training loss: 1.0833... 0.1297 sec/batch Epoch: 8/20... Training Step: 3390... Training loss: 1.0836... 0.1156 sec/batch Epoch: 8/20... Training Step: 3391... Training loss: 1.0377... 0.1194 sec/batch Epoch: 8/20... Training Step: 3392... Training loss: 1.2196... 0.1162 sec/batch Epoch: 8/20... Training Step: 3393... Training loss: 1.2394... 0.1141 sec/batch Epoch: 8/20... Training Step: 3394... Training loss: 1.1393... 0.1160 sec/batch Epoch: 8/20... Training Step: 3395... Training loss: 1.1310... 0.1150 sec/batch Epoch: 8/20... Training Step: 3396... Training loss: 1.0669... 0.1172 sec/batch Epoch: 8/20... Training Step: 3397... Training loss: 1.3027... 0.1176 sec/batch Epoch: 8/20... Training Step: 3398... Training loss: 1.3562... 0.1191 sec/batch Epoch: 8/20... Training Step: 3399... Training loss: 1.2296... 0.1191 sec/batch Epoch: 8/20... Training Step: 3400... Training loss: 1.1442... 0.1155 sec/batch Epoch: 8/20... Training Step: 3401... Training loss: 1.4094... 0.1165 sec/batch Epoch: 8/20... Training Step: 3402... Training loss: 1.2341... 0.1195 sec/batch Epoch: 8/20... Training Step: 3403... Training loss: 1.1850... 0.1129 sec/batch Epoch: 8/20... Training Step: 3404... Training loss: 1.2147... 0.1159 sec/batch Epoch: 8/20... Training Step: 3405... Training loss: 1.1028... 0.1141 sec/batch Epoch: 8/20... Training Step: 3406... Training loss: 1.2325... 0.1124 sec/batch Epoch: 8/20... Training Step: 3407... Training loss: 1.0698... 0.1157 sec/batch Epoch: 8/20... Training Step: 3408... Training loss: 1.1489... 0.1192 sec/batch Epoch: 8/20... Training Step: 3409... Training loss: 1.3284... 0.1172 sec/batch Epoch: 8/20... Training Step: 3410... Training loss: 1.1731... 0.1156 sec/batch Epoch: 8/20... Training Step: 3411... Training loss: 1.4068... 0.1136 sec/batch Epoch: 8/20... Training Step: 3412... Training loss: 1.1216... 0.1175 sec/batch Epoch: 8/20... Training Step: 3413... Training loss: 1.3087... 0.1169 sec/batch Epoch: 8/20... Training Step: 3414... Training loss: 1.1461... 0.1145 sec/batch Epoch: 8/20... Training Step: 3415... Training loss: 1.1127... 0.1203 sec/batch Epoch: 8/20... Training Step: 3416... Training loss: 1.2975... 0.1223 sec/batch Epoch: 8/20... Training Step: 3417... Training loss: 1.1204... 0.1156 sec/batch Epoch: 8/20... Training Step: 3418... Training loss: 1.3186... 0.1180 sec/batch Epoch: 8/20... Training Step: 3419... Training loss: 1.2813... 0.1183 sec/batch Epoch: 8/20... Training Step: 3420... Training loss: 1.4530... 0.1157 sec/batch Epoch: 8/20... Training Step: 3421... Training loss: 1.1071... 0.1128 sec/batch Epoch: 8/20... Training Step: 3422... Training loss: 1.1599... 0.1175 sec/batch Epoch: 8/20... Training Step: 3423... Training loss: 1.3531... 0.1203 sec/batch Epoch: 8/20... Training Step: 3424... Training loss: 1.0668... 0.1200 sec/batch Epoch: 8/20... Training Step: 3425... Training loss: 1.0775... 0.1166 sec/batch Epoch: 8/20... Training Step: 3426... Training loss: 1.2885... 0.1202 sec/batch Epoch: 8/20... Training Step: 3427... Training loss: 1.0786... 0.1200 sec/batch Epoch: 8/20... Training Step: 3428... Training loss: 1.2705... 0.1191 sec/batch Epoch: 8/20... Training Step: 3429... Training loss: 1.0824... 0.1166 sec/batch Epoch: 8/20... Training Step: 3430... Training loss: 1.3983... 0.1146 sec/batch Epoch: 8/20... Training Step: 3431... Training loss: 1.2709... 0.1192 sec/batch Epoch: 8/20... Training Step: 3432... Training loss: 1.1623... 0.1233 sec/batch Epoch: 8/20... Training Step: 3433... Training loss: 1.3321... 0.1226 sec/batch Epoch: 8/20... Training Step: 3434... Training loss: 1.2467... 0.1234 sec/batch Epoch: 8/20... Training Step: 3435... Training loss: 1.3658... 0.1319 sec/batch Epoch: 8/20... Training Step: 3436... Training loss: 0.9808... 0.1213 sec/batch Epoch: 8/20... Training Step: 3437... Training loss: 1.2422... 0.1171 sec/batch Epoch: 8/20... Training Step: 3438... Training loss: 1.1700... 0.1191 sec/batch Epoch: 8/20... Training Step: 3439... Training loss: 1.1619... 0.1185 sec/batch Epoch: 8/20... Training Step: 3440... Training loss: 1.3323... 0.1220 sec/batch Epoch: 8/20... Training Step: 3441... Training loss: 1.2559... 0.1192 sec/batch Epoch: 8/20... Training Step: 3442... Training loss: 1.2831... 0.1157 sec/batch Epoch: 8/20... Training Step: 3443... Training loss: 1.2767... 0.1179 sec/batch Epoch: 8/20... Training Step: 3444... Training loss: 1.2116... 0.1246 sec/batch Epoch: 8/20... Training Step: 3445... Training loss: 1.0955... 0.1359 sec/batch Epoch: 8/20... Training Step: 3446... Training loss: 1.2556... 0.1365 sec/batch Epoch: 8/20... Training Step: 3447... Training loss: 0.9638... 0.1266 sec/batch Epoch: 8/20... Training Step: 3448... Training loss: 1.2205... 0.1248 sec/batch Epoch: 8/20... Training Step: 3449... Training loss: 1.1820... 0.1213 sec/batch Epoch: 8/20... Training Step: 3450... Training loss: 1.2127... 0.1149 sec/batch Epoch: 8/20... Training Step: 3451... Training loss: 1.1697... 0.1189 sec/batch Epoch: 8/20... Training Step: 3452... Training loss: 1.2831... 0.1224 sec/batch Epoch: 8/20... Training Step: 3453... Training loss: 1.1755... 0.1345 sec/batch Epoch: 8/20... Training Step: 3454... Training loss: 1.0929... 0.1398 sec/batch Epoch: 8/20... Training Step: 3455... Training loss: 1.1790... 0.1191 sec/batch Epoch: 8/20... Training Step: 3456... Training loss: 1.2462... 0.1178 sec/batch Epoch: 8/20... Training Step: 3457... Training loss: 1.2278... 0.1186 sec/batch Epoch: 8/20... Training Step: 3458... Training loss: 1.0611... 0.1182 sec/batch Epoch: 8/20... Training Step: 3459... Training loss: 1.0867... 0.1194 sec/batch Epoch: 8/20... Training Step: 3460... Training loss: 1.2865... 0.1179 sec/batch Epoch: 8/20... Training Step: 3461... Training loss: 1.3636... 0.1198 sec/batch Epoch: 8/20... Training Step: 3462... Training loss: 1.1519... 0.1191 sec/batch Epoch: 8/20... Training Step: 3463... Training loss: 1.3023... 0.1180 sec/batch Epoch: 8/20... Training Step: 3464... Training loss: 1.2019... 0.1164 sec/batch Epoch: 8/20... Training Step: 3465... Training loss: 1.2683... 0.1208 sec/batch Epoch: 8/20... Training Step: 3466... Training loss: 1.1807... 0.1189 sec/batch Epoch: 8/20... Training Step: 3467... Training loss: 1.4081... 0.1180 sec/batch Epoch: 8/20... Training Step: 3468... Training loss: 1.1620... 0.1172 sec/batch Epoch: 8/20... Training Step: 3469... Training loss: 1.1256... 0.1175 sec/batch Epoch: 8/20... Training Step: 3470... Training loss: 1.5012... 0.1190 sec/batch Epoch: 8/20... Training Step: 3471... Training loss: 1.3165... 0.1177 sec/batch Epoch: 8/20... Training Step: 3472... Training loss: 1.5198... 0.1152 sec/batch Epoch: 8/20... Training Step: 3473... Training loss: 1.2766... 0.1203 sec/batch Epoch: 8/20... Training Step: 3474... Training loss: 1.3748... 0.1187 sec/batch Epoch: 8/20... Training Step: 3475... Training loss: 1.4035... 0.1136 sec/batch Epoch: 8/20... Training Step: 3476... Training loss: 1.1263... 0.1214 sec/batch Epoch: 8/20... Training Step: 3477... Training loss: 1.2199... 0.1226 sec/batch Epoch: 8/20... Training Step: 3478... Training loss: 1.1208... 0.1265 sec/batch Epoch: 8/20... Training Step: 3479... Training loss: 1.2814... 0.1194 sec/batch Epoch: 8/20... Training Step: 3480... Training loss: 1.1796... 0.1273 sec/batch Epoch: 8/20... Training Step: 3481... Training loss: 1.4161... 0.1184 sec/batch Epoch: 8/20... Training Step: 3482... Training loss: 1.1744... 0.1181 sec/batch Epoch: 8/20... Training Step: 3483... Training loss: 1.4632... 0.1153 sec/batch Epoch: 8/20... Training Step: 3484... Training loss: 1.2591... 0.1173 sec/batch Epoch: 8/20... Training Step: 3485... Training loss: 1.4350... 0.1200 sec/batch Epoch: 8/20... Training Step: 3486... Training loss: 0.9963... 0.1181 sec/batch Epoch: 8/20... Training Step: 3487... Training loss: 1.2543... 0.1195 sec/batch Epoch: 8/20... Training Step: 3488... Training loss: 1.2731... 0.1210 sec/batch Epoch: 8/20... Training Step: 3489... Training loss: 1.2142... 0.1184 sec/batch Epoch: 8/20... Training Step: 3490... Training loss: 1.1756... 0.1190 sec/batch Epoch: 8/20... Training Step: 3491... Training loss: 1.4179... 0.1163 sec/batch Epoch: 8/20... Training Step: 3492... Training loss: 1.2849... 0.1125 sec/batch Epoch: 8/20... Training Step: 3493... Training loss: 1.2010... 0.1098 sec/batch Epoch: 8/20... Training Step: 3494... Training loss: 1.1039... 0.1154 sec/batch Epoch: 8/20... Training Step: 3495... Training loss: 1.1747... 0.1117 sec/batch Epoch: 8/20... Training Step: 3496... Training loss: 1.3154... 0.1163 sec/batch Epoch: 8/20... Training Step: 3497... Training loss: 1.1856... 0.1119 sec/batch Epoch: 8/20... Training Step: 3498... Training loss: 1.2362... 0.1163 sec/batch Epoch: 8/20... Training Step: 3499... Training loss: 1.5139... 0.1151 sec/batch Epoch: 8/20... Training Step: 3500... Training loss: 1.1847... 0.1177 sec/batch Epoch: 8/20... Training Step: 3501... Training loss: 1.1698... 0.1172 sec/batch Epoch: 8/20... Training Step: 3502... Training loss: 1.1883... 0.1144 sec/batch Epoch: 8/20... Training Step: 3503... Training loss: 1.2426... 0.1173 sec/batch Epoch: 8/20... Training Step: 3504... Training loss: 1.3160... 0.1121 sec/batch Epoch: 8/20... Training Step: 3505... Training loss: 1.4383... 0.1169 sec/batch Epoch: 8/20... Training Step: 3506... Training loss: 1.0693... 0.1270 sec/batch Epoch: 8/20... Training Step: 3507... Training loss: 1.0964... 0.1257 sec/batch Epoch: 8/20... Training Step: 3508... Training loss: 1.1918... 0.1175 sec/batch Epoch: 8/20... Training Step: 3509... Training loss: 1.2756... 0.1229 sec/batch Epoch: 8/20... Training Step: 3510... Training loss: 1.3913... 0.1305 sec/batch Epoch: 8/20... Training Step: 3511... Training loss: 1.1669... 0.1171 sec/batch Epoch: 8/20... Training Step: 3512... Training loss: 1.2816... 0.1140 sec/batch Epoch: 8/20... Training Step: 3513... Training loss: 1.3139... 0.1111 sec/batch Epoch: 8/20... Training Step: 3514... Training loss: 1.2217... 0.1222 sec/batch Epoch: 8/20... Training Step: 3515... Training loss: 1.5079... 0.1138 sec/batch Epoch: 8/20... Training Step: 3516... Training loss: 1.3340... 0.1185 sec/batch Epoch: 8/20... Training Step: 3517... Training loss: 1.3063... 0.1154 sec/batch Epoch: 8/20... Training Step: 3518... Training loss: 1.3483... 0.1195 sec/batch Epoch: 8/20... Training Step: 3519... Training loss: 1.2842... 0.1171 sec/batch Epoch: 8/20... Training Step: 3520... Training loss: 1.4193... 0.1161 sec/batch Epoch: 8/20... Training Step: 3521... Training loss: 1.3003... 0.1192 sec/batch Epoch: 8/20... Training Step: 3522... Training loss: 1.2223... 0.1152 sec/batch Epoch: 8/20... Training Step: 3523... Training loss: 1.3015... 0.1175 sec/batch Epoch: 8/20... Training Step: 3524... Training loss: 1.1168... 0.1178 sec/batch Epoch: 8/20... Training Step: 3525... Training loss: 1.0948... 0.1165 sec/batch Epoch: 8/20... Training Step: 3526... Training loss: 1.3710... 0.1122 sec/batch Epoch: 8/20... Training Step: 3527... Training loss: 1.2253... 0.1171 sec/batch Epoch: 8/20... Training Step: 3528... Training loss: 1.2259... 0.1178 sec/batch Epoch: 8/20... Training Step: 3529... Training loss: 1.2264... 0.1164 sec/batch Epoch: 8/20... Training Step: 3530... Training loss: 1.1534... 0.1144 sec/batch Epoch: 8/20... Training Step: 3531... Training loss: 1.1764... 0.1188 sec/batch Epoch: 8/20... Training Step: 3532... Training loss: 1.2178... 0.1173 sec/batch Epoch: 8/20... Training Step: 3533... Training loss: 1.0923... 0.1189 sec/batch Epoch: 8/20... Training Step: 3534... Training loss: 1.2170... 0.1181 sec/batch Epoch: 8/20... Training Step: 3535... Training loss: 1.2378... 0.1183 sec/batch Epoch: 8/20... Training Step: 3536... Training loss: 1.2721... 0.1173 sec/batch Epoch: 8/20... Training Step: 3537... Training loss: 1.2870... 0.1180 sec/batch Epoch: 8/20... Training Step: 3538... Training loss: 1.2874... 0.1183 sec/batch Epoch: 8/20... Training Step: 3539... Training loss: 1.1355... 0.1198 sec/batch Epoch: 8/20... Training Step: 3540... Training loss: 1.1787... 0.1180 sec/batch Epoch: 8/20... Training Step: 3541... Training loss: 1.1525... 0.1142 sec/batch Epoch: 8/20... Training Step: 3542... Training loss: 1.1899... 0.1112 sec/batch Epoch: 8/20... Training Step: 3543... Training loss: 1.2645... 0.1140 sec/batch Epoch: 8/20... Training Step: 3544... Training loss: 1.4336... 0.1206 sec/batch Epoch: 8/20... Training Step: 3545... Training loss: 1.1304... 0.1188 sec/batch Epoch: 8/20... Training Step: 3546... Training loss: 1.1376... 0.1143 sec/batch Epoch: 8/20... Training Step: 3547... Training loss: 1.2392... 0.1174 sec/batch Epoch: 8/20... Training Step: 3548... Training loss: 1.1892... 0.1174 sec/batch Epoch: 8/20... Training Step: 3549... Training loss: 1.2131... 0.1177 sec/batch Epoch: 8/20... Training Step: 3550... Training loss: 1.1614... 0.1202 sec/batch Epoch: 8/20... Training Step: 3551... Training loss: 1.0127... 0.1153 sec/batch Epoch: 8/20... Training Step: 3552... Training loss: 1.3428... 0.1146 sec/batch Epoch: 8/20... Training Step: 3553... Training loss: 1.1150... 0.1158 sec/batch Epoch: 8/20... Training Step: 3554... Training loss: 1.2468... 0.1163 sec/batch Epoch: 8/20... Training Step: 3555... Training loss: 1.2675... 0.1165 sec/batch Epoch: 8/20... Training Step: 3556... Training loss: 1.5856... 0.1165 sec/batch Epoch: 8/20... Training Step: 3557... Training loss: 1.4071... 0.1190 sec/batch Epoch: 8/20... Training Step: 3558... Training loss: 1.3550... 0.1233 sec/batch Epoch: 8/20... Training Step: 3559... Training loss: 1.2834... 0.1153 sec/batch Epoch: 8/20... Training Step: 3560... Training loss: 1.1250... 0.1198 sec/batch Epoch: 8/20... Training Step: 3561... Training loss: 1.2427... 0.1160 sec/batch Epoch: 8/20... Training Step: 3562... Training loss: 1.2434... 0.1154 sec/batch Epoch: 8/20... Training Step: 3563... Training loss: 0.9807... 0.1163 sec/batch Epoch: 8/20... Training Step: 3564... Training loss: 1.0360... 0.1152 sec/batch Epoch: 8/20... Training Step: 3565... Training loss: 1.1987... 0.1189 sec/batch Epoch: 8/20... Training Step: 3566... Training loss: 1.1793... 0.1219 sec/batch Epoch: 8/20... Training Step: 3567... Training loss: 1.1244... 0.1164 sec/batch Epoch: 8/20... Training Step: 3568... Training loss: 1.1163... 0.1177 sec/batch Epoch: 8/20... Training Step: 3569... Training loss: 1.1717... 0.1178 sec/batch Epoch: 8/20... Training Step: 3570... Training loss: 1.3050... 0.1201 sec/batch Epoch: 8/20... Training Step: 3571... Training loss: 1.0987... 0.1217 sec/batch Epoch: 8/20... Training Step: 3572... Training loss: 1.1090... 0.1172 sec/batch Epoch: 8/20... Training Step: 3573... Training loss: 1.0227... 0.1201 sec/batch Epoch: 8/20... Training Step: 3574... Training loss: 1.0322... 0.1216 sec/batch Epoch: 8/20... Training Step: 3575... Training loss: 1.2717... 0.1207 sec/batch Epoch: 8/20... Training Step: 3576... Training loss: 1.1401... 0.1138 sec/batch Epoch: 8/20... Training Step: 3577... Training loss: 1.3145... 0.1148 sec/batch Epoch: 8/20... Training Step: 3578... Training loss: 1.0755... 0.1157 sec/batch Epoch: 8/20... Training Step: 3579... Training loss: 1.2717... 0.1190 sec/batch Epoch: 8/20... Training Step: 3580... Training loss: 1.2385... 0.1175 sec/batch Epoch: 8/20... Training Step: 3581... Training loss: 1.0678... 0.1204 sec/batch Epoch: 8/20... Training Step: 3582... Training loss: 1.1512... 0.1179 sec/batch Epoch: 8/20... Training Step: 3583... Training loss: 1.3370... 0.1214 sec/batch Epoch: 8/20... Training Step: 3584... Training loss: 1.1858... 0.1171 sec/batch Epoch: 8/20... Training Step: 3585... Training loss: 1.2930... 0.1167 sec/batch Epoch: 8/20... Training Step: 3586... Training loss: 1.0456... 0.1160 sec/batch Epoch: 8/20... Training Step: 3587... Training loss: 1.2738... 0.1191 sec/batch Epoch: 8/20... Training Step: 3588... Training loss: 1.0772... 0.1214 sec/batch Epoch: 8/20... Training Step: 3589... Training loss: 1.1081... 0.1137 sec/batch Epoch: 8/20... Training Step: 3590... Training loss: 1.2896... 0.1168 sec/batch Epoch: 8/20... Training Step: 3591... Training loss: 1.0592... 0.1175 sec/batch Epoch: 8/20... Training Step: 3592... Training loss: 1.3552... 0.1204 sec/batch Epoch: 8/20... Training Step: 3593... Training loss: 1.1708... 0.1133 sec/batch Epoch: 8/20... Training Step: 3594... Training loss: 0.9883... 0.1209 sec/batch Epoch: 8/20... Training Step: 3595... Training loss: 1.0935... 0.1181 sec/batch Epoch: 8/20... Training Step: 3596... Training loss: 1.6098... 0.1176 sec/batch Epoch: 8/20... Training Step: 3597... Training loss: 1.1201... 0.1186 sec/batch Epoch: 8/20... Training Step: 3598... Training loss: 1.2033... 0.1153 sec/batch Epoch: 8/20... Training Step: 3599... Training loss: 1.2012... 0.1186 sec/batch Epoch: 8/20... Training Step: 3600... Training loss: 1.0117... 0.1168 sec/batch Epoch: 8/20... Training Step: 3601... Training loss: 0.9879... 0.1175 sec/batch Epoch: 8/20... Training Step: 3602... Training loss: 0.9650... 0.1158 sec/batch Epoch: 8/20... Training Step: 3603... Training loss: 1.2634... 0.1162 sec/batch Epoch: 8/20... Training Step: 3604... Training loss: 1.0917... 0.1174 sec/batch Epoch: 8/20... Training Step: 3605... Training loss: 1.0822... 0.1157 sec/batch Epoch: 8/20... Training Step: 3606... Training loss: 1.1990... 0.1204 sec/batch Epoch: 8/20... Training Step: 3607... Training loss: 1.2827... 0.1208 sec/batch Epoch: 8/20... Training Step: 3608... Training loss: 0.9170... 0.1204 sec/batch Epoch: 8/20... Training Step: 3609... Training loss: 1.3381... 0.1163 sec/batch Epoch: 8/20... Training Step: 3610... Training loss: 1.2600... 0.1138 sec/batch Epoch: 8/20... Training Step: 3611... Training loss: 1.1378... 0.1143 sec/batch Epoch: 8/20... Training Step: 3612... Training loss: 1.0712... 0.1182 sec/batch Epoch: 8/20... Training Step: 3613... Training loss: 1.0983... 0.1206 sec/batch Epoch: 8/20... Training Step: 3614... Training loss: 1.2849... 0.1175 sec/batch Epoch: 8/20... Training Step: 3615... Training loss: 1.1254... 0.1195 sec/batch Epoch: 8/20... Training Step: 3616... Training loss: 1.4476... 0.1210 sec/batch Epoch: 8/20... Training Step: 3617... Training loss: 1.2753... 0.1178 sec/batch Epoch: 8/20... Training Step: 3618... Training loss: 1.1892... 0.1174 sec/batch Epoch: 8/20... Training Step: 3619... Training loss: 1.0789... 0.1158 sec/batch Epoch: 8/20... Training Step: 3620... Training loss: 1.3881... 0.1132 sec/batch Epoch: 8/20... Training Step: 3621... Training loss: 1.1967... 0.1170 sec/batch Epoch: 8/20... Training Step: 3622... Training loss: 1.2696... 0.1197 sec/batch Epoch: 8/20... Training Step: 3623... Training loss: 1.2152... 0.1213 sec/batch Epoch: 8/20... Training Step: 3624... Training loss: 1.3095... 0.1165 sec/batch Epoch: 8/20... Training Step: 3625... Training loss: 1.3215... 0.1202 sec/batch Epoch: 8/20... Training Step: 3626... Training loss: 1.3250... 0.1208 sec/batch Epoch: 8/20... Training Step: 3627... Training loss: 1.1718... 0.1196 sec/batch Epoch: 8/20... Training Step: 3628... Training loss: 1.2460... 0.1194 sec/batch Epoch: 8/20... Training Step: 3629... Training loss: 1.1353... 0.1238 sec/batch Epoch: 8/20... Training Step: 3630... Training loss: 1.1156... 0.1199 sec/batch Epoch: 8/20... Training Step: 3631... Training loss: 1.0353... 0.1179 sec/batch Epoch: 8/20... Training Step: 3632... Training loss: 1.1766... 0.1181 sec/batch Epoch: 8/20... Training Step: 3633... Training loss: 1.1169... 0.1146 sec/batch Epoch: 8/20... Training Step: 3634... Training loss: 1.3121... 0.1113 sec/batch Epoch: 8/20... Training Step: 3635... Training loss: 1.1549... 0.1145 sec/batch Epoch: 8/20... Training Step: 3636... Training loss: 1.0469... 0.1205 sec/batch Epoch: 8/20... Training Step: 3637... Training loss: 1.0398... 0.1293 sec/batch Epoch: 8/20... Training Step: 3638... Training loss: 1.1428... 0.1231 sec/batch Epoch: 8/20... Training Step: 3639... Training loss: 1.0695... 0.1177 sec/batch Epoch: 8/20... Training Step: 3640... Training loss: 1.1162... 0.1140 sec/batch Epoch: 8/20... Training Step: 3641... Training loss: 1.1384... 0.1146 sec/batch Epoch: 8/20... Training Step: 3642... Training loss: 1.1871... 0.1163 sec/batch Epoch: 8/20... Training Step: 3643... Training loss: 1.0643... 0.1192 sec/batch Epoch: 8/20... Training Step: 3644... Training loss: 1.3138... 0.1176 sec/batch Epoch: 8/20... Training Step: 3645... Training loss: 1.1776... 0.1201 sec/batch Epoch: 8/20... Training Step: 3646... Training loss: 1.1316... 0.1171 sec/batch Epoch: 8/20... Training Step: 3647... Training loss: 1.1926... 0.1089 sec/batch Epoch: 8/20... Training Step: 3648... Training loss: 1.1154... 0.1156 sec/batch Epoch: 8/20... Training Step: 3649... Training loss: 1.2040... 0.1131 sec/batch Epoch: 8/20... Training Step: 3650... Training loss: 1.0210... 0.1150 sec/batch Epoch: 8/20... Training Step: 3651... Training loss: 1.0906... 0.1159 sec/batch Epoch: 8/20... Training Step: 3652... Training loss: 1.3195... 0.1149 sec/batch Epoch: 8/20... Training Step: 3653... Training loss: 1.1210... 0.1115 sec/batch Epoch: 8/20... Training Step: 3654... Training loss: 1.5398... 0.1153 sec/batch Epoch: 8/20... Training Step: 3655... Training loss: 1.1332... 0.1177 sec/batch Epoch: 8/20... Training Step: 3656... Training loss: 1.5540... 0.1148 sec/batch Epoch: 8/20... Training Step: 3657... Training loss: 1.2730... 0.1165 sec/batch Epoch: 8/20... Training Step: 3658... Training loss: 1.1283... 0.1135 sec/batch Epoch: 8/20... Training Step: 3659... Training loss: 1.0646... 0.1168 sec/batch Epoch: 8/20... Training Step: 3660... Training loss: 1.1104... 0.1187 sec/batch Epoch: 8/20... Training Step: 3661... Training loss: 1.2510... 0.1231 sec/batch Epoch: 8/20... Training Step: 3662... Training loss: 1.2064... 0.1154 sec/batch Epoch: 8/20... Training Step: 3663... Training loss: 1.3209... 0.1128 sec/batch Epoch: 8/20... Training Step: 3664... Training loss: 1.3347... 0.1207 sec/batch Epoch: 8/20... Training Step: 3665... Training loss: 1.3407... 0.1186 sec/batch Epoch: 8/20... Training Step: 3666... Training loss: 1.0755... 0.1167 sec/batch Epoch: 8/20... Training Step: 3667... Training loss: 1.2830... 0.1142 sec/batch Epoch: 8/20... Training Step: 3668... Training loss: 1.0141... 0.1163 sec/batch Epoch: 8/20... Training Step: 3669... Training loss: 1.2766... 0.1169 sec/batch Epoch: 8/20... Training Step: 3670... Training loss: 1.3100... 0.1188 sec/batch Epoch: 8/20... Training Step: 3671... Training loss: 1.3143... 0.1161 sec/batch Epoch: 8/20... Training Step: 3672... Training loss: 1.4252... 0.1194 sec/batch Epoch: 8/20... Training Step: 3673... Training loss: 1.2260... 0.1214 sec/batch Epoch: 8/20... Training Step: 3674... Training loss: 1.2051... 0.1179 sec/batch Epoch: 8/20... Training Step: 3675... Training loss: 1.1938... 0.1156 sec/batch Epoch: 8/20... Training Step: 3676... Training loss: 1.2538... 0.1196 sec/batch Epoch: 8/20... Training Step: 3677... Training loss: 1.0626... 0.1137 sec/batch Epoch: 8/20... Training Step: 3678... Training loss: 1.2141... 0.1169 sec/batch Epoch: 8/20... Training Step: 3679... Training loss: 1.3506... 0.1172 sec/batch Epoch: 8/20... Training Step: 3680... Training loss: 1.3112... 0.1135 sec/batch Epoch: 8/20... Training Step: 3681... Training loss: 1.4658... 0.1160 sec/batch Epoch: 8/20... Training Step: 3682... Training loss: 1.2654... 0.1189 sec/batch Epoch: 8/20... Training Step: 3683... Training loss: 1.0896... 0.1183 sec/batch Epoch: 8/20... Training Step: 3684... Training loss: 1.2297... 0.1165 sec/batch Epoch: 8/20... Training Step: 3685... Training loss: 1.2225... 0.1174 sec/batch Epoch: 8/20... Training Step: 3686... Training loss: 1.2966... 0.1227 sec/batch Epoch: 8/20... Training Step: 3687... Training loss: 1.3977... 0.1146 sec/batch Epoch: 8/20... Training Step: 3688... Training loss: 1.4859... 0.1101 sec/batch Epoch: 8/20... Training Step: 3689... Training loss: 1.0861... 0.1148 sec/batch Epoch: 8/20... Training Step: 3690... Training loss: 1.1856... 0.1181 sec/batch Epoch: 8/20... Training Step: 3691... Training loss: 1.2494... 0.1175 sec/batch Epoch: 8/20... Training Step: 3692... Training loss: 1.2289... 0.1177 sec/batch Epoch: 8/20... Training Step: 3693... Training loss: 1.2114... 0.1172 sec/batch Epoch: 8/20... Training Step: 3694... Training loss: 1.1489... 0.1221 sec/batch Epoch: 8/20... Training Step: 3695... Training loss: 1.3059... 0.1174 sec/batch Epoch: 8/20... Training Step: 3696... Training loss: 1.1364... 0.1172 sec/batch Epoch: 8/20... Training Step: 3697... Training loss: 1.2428... 0.1161 sec/batch Epoch: 8/20... Training Step: 3698... Training loss: 1.2286... 0.1239 sec/batch Epoch: 8/20... Training Step: 3699... Training loss: 1.1761... 0.1168 sec/batch Epoch: 8/20... Training Step: 3700... Training loss: 1.2923... 0.1169 sec/batch Epoch: 8/20... Training Step: 3701... Training loss: 1.0435... 0.1196 sec/batch Epoch: 8/20... Training Step: 3702... Training loss: 1.4672... 0.1187 sec/batch Epoch: 8/20... Training Step: 3703... Training loss: 1.2033... 0.1142 sec/batch Epoch: 8/20... Training Step: 3704... Training loss: 1.0732... 0.1128 sec/batch Epoch: 8/20... Training Step: 3705... Training loss: 1.1276... 0.1180 sec/batch Epoch: 8/20... Training Step: 3706... Training loss: 1.0397... 0.1153 sec/batch Epoch: 8/20... Training Step: 3707... Training loss: 1.0802... 0.1177 sec/batch Epoch: 8/20... Training Step: 3708... Training loss: 1.2413... 0.1199 sec/batch Epoch: 8/20... Training Step: 3709... Training loss: 1.2985... 0.1155 sec/batch Epoch: 8/20... Training Step: 3710... Training loss: 1.1363... 0.1168 sec/batch Epoch: 8/20... Training Step: 3711... Training loss: 1.2168... 0.1142 sec/batch Epoch: 8/20... Training Step: 3712... Training loss: 1.1327... 0.1150 sec/batch Epoch: 9/20... Training Step: 3713... Training loss: 1.5320... 0.1194 sec/batch Epoch: 9/20... Training Step: 3714... Training loss: 1.2747... 0.1192 sec/batch Epoch: 9/20... Training Step: 3715... Training loss: 1.1972... 0.1171 sec/batch Epoch: 9/20... Training Step: 3716... Training loss: 1.2015... 0.1151 sec/batch Epoch: 9/20... Training Step: 3717... Training loss: 1.2670... 0.1165 sec/batch Epoch: 9/20... Training Step: 3718... Training loss: 1.0613... 0.1185 sec/batch Epoch: 9/20... Training Step: 3719... Training loss: 1.4628... 0.1173 sec/batch Epoch: 9/20... Training Step: 3720... Training loss: 1.1439... 0.1180 sec/batch Epoch: 9/20... Training Step: 3721... Training loss: 1.0168... 0.1175 sec/batch Epoch: 9/20... Training Step: 3722... Training loss: 1.2820... 0.1141 sec/batch Epoch: 9/20... Training Step: 3723... Training loss: 1.1704... 0.1134 sec/batch Epoch: 9/20... Training Step: 3724... Training loss: 1.0800... 0.1145 sec/batch Epoch: 9/20... Training Step: 3725... Training loss: 1.3321... 0.1200 sec/batch Epoch: 9/20... Training Step: 3726... Training loss: 0.9963... 0.1175 sec/batch Epoch: 9/20... Training Step: 3727... Training loss: 1.1766... 0.1175 sec/batch Epoch: 9/20... Training Step: 3728... Training loss: 1.2556... 0.1185 sec/batch Epoch: 9/20... Training Step: 3729... Training loss: 1.1061... 0.1148 sec/batch Epoch: 9/20... Training Step: 3730... Training loss: 1.1364... 0.1182 sec/batch Epoch: 9/20... Training Step: 3731... Training loss: 1.2040... 0.1182 sec/batch Epoch: 9/20... Training Step: 3732... Training loss: 1.1218... 0.1174 sec/batch Epoch: 9/20... Training Step: 3733... Training loss: 1.2983... 0.1210 sec/batch Epoch: 9/20... Training Step: 3734... Training loss: 1.0552... 0.1157 sec/batch Epoch: 9/20... Training Step: 3735... Training loss: 1.2856... 0.1163 sec/batch Epoch: 9/20... Training Step: 3736... Training loss: 1.1001... 0.1184 sec/batch Epoch: 9/20... Training Step: 3737... Training loss: 1.1027... 0.1204 sec/batch Epoch: 9/20... Training Step: 3738... Training loss: 1.0988... 0.1155 sec/batch Epoch: 9/20... Training Step: 3739... Training loss: 1.3067... 0.1169 sec/batch Epoch: 9/20... Training Step: 3740... Training loss: 1.0097... 0.1214 sec/batch Epoch: 9/20... Training Step: 3741... Training loss: 1.0685... 0.1160 sec/batch Epoch: 9/20... Training Step: 3742... Training loss: 1.1552... 0.1122 sec/batch Epoch: 9/20... Training Step: 3743... Training loss: 1.0153... 0.1182 sec/batch Epoch: 9/20... Training Step: 3744... Training loss: 1.1073... 0.1149 sec/batch Epoch: 9/20... Training Step: 3745... Training loss: 1.0013... 0.1156 sec/batch Epoch: 9/20... Training Step: 3746... Training loss: 0.9218... 0.1199 sec/batch Epoch: 9/20... Training Step: 3747... Training loss: 1.0162... 0.1220 sec/batch Epoch: 9/20... Training Step: 3748... Training loss: 0.9840... 0.1172 sec/batch Epoch: 9/20... Training Step: 3749... Training loss: 1.1616... 0.1185 sec/batch Epoch: 9/20... Training Step: 3750... Training loss: 1.0463... 0.1174 sec/batch Epoch: 9/20... Training Step: 3751... Training loss: 0.9711... 0.1265 sec/batch Epoch: 9/20... Training Step: 3752... Training loss: 1.4550... 0.1207 sec/batch Epoch: 9/20... Training Step: 3753... Training loss: 1.1485... 0.1209 sec/batch Epoch: 9/20... Training Step: 3754... Training loss: 1.0232... 0.1188 sec/batch Epoch: 9/20... Training Step: 3755... Training loss: 1.3335... 0.1177 sec/batch Epoch: 9/20... Training Step: 3756... Training loss: 0.9102... 0.1187 sec/batch Epoch: 9/20... Training Step: 3757... Training loss: 1.1285... 0.1183 sec/batch Epoch: 9/20... Training Step: 3758... Training loss: 1.1115... 0.1141 sec/batch Epoch: 9/20... Training Step: 3759... Training loss: 1.2281... 0.1206 sec/batch Epoch: 9/20... Training Step: 3760... Training loss: 1.0665... 0.1166 sec/batch Epoch: 9/20... Training Step: 3761... Training loss: 1.0426... 0.1190 sec/batch Epoch: 9/20... Training Step: 3762... Training loss: 1.1176... 0.1146 sec/batch Epoch: 9/20... Training Step: 3763... Training loss: 1.1277... 0.1133 sec/batch Epoch: 9/20... Training Step: 3764... Training loss: 1.1510... 0.1164 sec/batch Epoch: 9/20... Training Step: 3765... Training loss: 1.1701... 0.1211 sec/batch Epoch: 9/20... Training Step: 3766... Training loss: 1.2572... 0.1157 sec/batch Epoch: 9/20... Training Step: 3767... Training loss: 1.0497... 0.1177 sec/batch Epoch: 9/20... Training Step: 3768... Training loss: 1.1780... 0.1191 sec/batch Epoch: 9/20... Training Step: 3769... Training loss: 1.1573... 0.1236 sec/batch Epoch: 9/20... Training Step: 3770... Training loss: 1.2053... 0.1181 sec/batch Epoch: 9/20... Training Step: 3771... Training loss: 1.0686... 0.1172 sec/batch Epoch: 9/20... Training Step: 3772... Training loss: 1.0564... 0.1165 sec/batch Epoch: 9/20... Training Step: 3773... Training loss: 1.1102... 0.1228 sec/batch Epoch: 9/20... Training Step: 3774... Training loss: 1.2381... 0.1123 sec/batch Epoch: 9/20... Training Step: 3775... Training loss: 1.1156... 0.1145 sec/batch Epoch: 9/20... Training Step: 3776... Training loss: 1.2140... 0.1143 sec/batch Epoch: 9/20... Training Step: 3777... Training loss: 1.0763... 0.1214 sec/batch Epoch: 9/20... Training Step: 3778... Training loss: 1.2878... 0.1166 sec/batch Epoch: 9/20... Training Step: 3779... Training loss: 1.1538... 0.1178 sec/batch Epoch: 9/20... Training Step: 3780... Training loss: 1.1438... 0.1218 sec/batch Epoch: 9/20... Training Step: 3781... Training loss: 1.1072... 0.1203 sec/batch Epoch: 9/20... Training Step: 3782... Training loss: 1.2298... 0.1172 sec/batch Epoch: 9/20... Training Step: 3783... Training loss: 1.2939... 0.1150 sec/batch Epoch: 9/20... Training Step: 3784... Training loss: 1.1040... 0.1198 sec/batch Epoch: 9/20... Training Step: 3785... Training loss: 1.2440... 0.1222 sec/batch Epoch: 9/20... Training Step: 3786... Training loss: 1.0282... 0.1161 sec/batch Epoch: 9/20... Training Step: 3787... Training loss: 1.3070... 0.1165 sec/batch Epoch: 9/20... Training Step: 3788... Training loss: 1.0301... 0.1167 sec/batch Epoch: 9/20... Training Step: 3789... Training loss: 1.0376... 0.1187 sec/batch Epoch: 9/20... Training Step: 3790... Training loss: 1.1478... 0.1178 sec/batch Epoch: 9/20... Training Step: 3791... Training loss: 1.1900... 0.1158 sec/batch Epoch: 9/20... Training Step: 3792... Training loss: 1.0514... 0.1167 sec/batch Epoch: 9/20... Training Step: 3793... Training loss: 1.3060... 0.1198 sec/batch Epoch: 9/20... Training Step: 3794... Training loss: 1.1302... 0.1146 sec/batch Epoch: 9/20... Training Step: 3795... Training loss: 0.9719... 0.1194 sec/batch Epoch: 9/20... Training Step: 3796... Training loss: 1.1993... 0.1189 sec/batch Epoch: 9/20... Training Step: 3797... Training loss: 1.1943... 0.1199 sec/batch Epoch: 9/20... Training Step: 3798... Training loss: 1.3317... 0.1213 sec/batch Epoch: 9/20... Training Step: 3799... Training loss: 1.0516... 0.1131 sec/batch Epoch: 9/20... Training Step: 3800... Training loss: 1.2661... 0.1191 sec/batch Epoch: 9/20... Training Step: 3801... Training loss: 1.2984... 0.1341 sec/batch Epoch: 9/20... Training Step: 3802... Training loss: 1.0981... 0.1316 sec/batch Epoch: 9/20... Training Step: 3803... Training loss: 1.2408... 0.1398 sec/batch Epoch: 9/20... Training Step: 3804... Training loss: 1.3233... 0.1239 sec/batch Epoch: 9/20... Training Step: 3805... Training loss: 0.9964... 0.1304 sec/batch Epoch: 9/20... Training Step: 3806... Training loss: 1.3029... 0.1260 sec/batch Epoch: 9/20... Training Step: 3807... Training loss: 1.1359... 0.1312 sec/batch Epoch: 9/20... Training Step: 3808... Training loss: 1.1413... 0.1275 sec/batch Epoch: 9/20... Training Step: 3809... Training loss: 1.3998... 0.1333 sec/batch Epoch: 9/20... Training Step: 3810... Training loss: 1.3632... 0.1234 sec/batch Epoch: 9/20... Training Step: 3811... Training loss: 1.1958... 0.1310 sec/batch Epoch: 9/20... Training Step: 3812... Training loss: 1.1398... 0.1314 sec/batch Epoch: 9/20... Training Step: 3813... Training loss: 1.2688... 0.1347 sec/batch Epoch: 9/20... Training Step: 3814... Training loss: 1.3363... 0.1176 sec/batch Epoch: 9/20... Training Step: 3815... Training loss: 1.4669... 0.1337 sec/batch Epoch: 9/20... Training Step: 3816... Training loss: 1.1994... 0.1268 sec/batch Epoch: 9/20... Training Step: 3817... Training loss: 1.3930... 0.1323 sec/batch Epoch: 9/20... Training Step: 3818... Training loss: 1.3335... 0.1322 sec/batch Epoch: 9/20... Training Step: 3819... Training loss: 1.2424... 0.1179 sec/batch Epoch: 9/20... Training Step: 3820... Training loss: 1.2670... 0.1203 sec/batch Epoch: 9/20... Training Step: 3821... Training loss: 1.2071... 0.1310 sec/batch Epoch: 9/20... Training Step: 3822... Training loss: 1.0459... 0.1316 sec/batch Epoch: 9/20... Training Step: 3823... Training loss: 1.1453... 0.1358 sec/batch Epoch: 9/20... Training Step: 3824... Training loss: 1.0989... 0.1421 sec/batch Epoch: 9/20... Training Step: 3825... Training loss: 1.2646... 0.1347 sec/batch Epoch: 9/20... Training Step: 3826... Training loss: 1.3884... 0.1342 sec/batch Epoch: 9/20... Training Step: 3827... Training loss: 1.1303... 0.1341 sec/batch Epoch: 9/20... Training Step: 3828... Training loss: 1.1183... 0.1322 sec/batch Epoch: 9/20... Training Step: 3829... Training loss: 1.1555... 0.1309 sec/batch Epoch: 9/20... Training Step: 3830... Training loss: 1.1988... 0.1426 sec/batch Epoch: 9/20... Training Step: 3831... Training loss: 1.2079... 0.1288 sec/batch Epoch: 9/20... Training Step: 3832... Training loss: 1.0413... 0.1258 sec/batch Epoch: 9/20... Training Step: 3833... Training loss: 1.2583... 0.1195 sec/batch Epoch: 9/20... Training Step: 3834... Training loss: 1.2215... 0.1312 sec/batch Epoch: 9/20... Training Step: 3835... Training loss: 1.2448... 0.1259 sec/batch Epoch: 9/20... Training Step: 3836... Training loss: 1.1691... 0.1213 sec/batch Epoch: 9/20... Training Step: 3837... Training loss: 1.1772... 0.1385 sec/batch Epoch: 9/20... Training Step: 3838... Training loss: 1.0182... 0.1391 sec/batch Epoch: 9/20... Training Step: 3839... Training loss: 1.1551... 0.1302 sec/batch Epoch: 9/20... Training Step: 3840... Training loss: 1.3720... 0.1270 sec/batch Epoch: 9/20... Training Step: 3841... Training loss: 1.2571... 0.1287 sec/batch Epoch: 9/20... Training Step: 3842... Training loss: 1.1206... 0.1283 sec/batch Epoch: 9/20... Training Step: 3843... Training loss: 1.3158... 0.1292 sec/batch Epoch: 9/20... Training Step: 3844... Training loss: 1.1912... 0.1277 sec/batch Epoch: 9/20... Training Step: 3845... Training loss: 1.1410... 0.1296 sec/batch Epoch: 9/20... Training Step: 3846... Training loss: 1.4156... 0.1312 sec/batch Epoch: 9/20... Training Step: 3847... Training loss: 1.0270... 0.1299 sec/batch Epoch: 9/20... Training Step: 3848... Training loss: 0.9582... 0.1195 sec/batch Epoch: 9/20... Training Step: 3849... Training loss: 1.0672... 0.1227 sec/batch Epoch: 9/20... Training Step: 3850... Training loss: 1.1840... 0.1200 sec/batch Epoch: 9/20... Training Step: 3851... Training loss: 1.1181... 0.1338 sec/batch Epoch: 9/20... Training Step: 3852... Training loss: 1.2428... 0.1211 sec/batch Epoch: 9/20... Training Step: 3853... Training loss: 1.0695... 0.1171 sec/batch Epoch: 9/20... Training Step: 3854... Training loss: 1.0753... 0.1223 sec/batch Epoch: 9/20... Training Step: 3855... Training loss: 1.0133... 0.1280 sec/batch Epoch: 9/20... Training Step: 3856... Training loss: 1.2034... 0.1215 sec/batch Epoch: 9/20... Training Step: 3857... Training loss: 1.1841... 0.1135 sec/batch Epoch: 9/20... Training Step: 3858... Training loss: 1.1464... 0.1210 sec/batch Epoch: 9/20... Training Step: 3859... Training loss: 1.1337... 0.1314 sec/batch Epoch: 9/20... Training Step: 3860... Training loss: 1.0549... 0.1195 sec/batch Epoch: 9/20... Training Step: 3861... Training loss: 1.0795... 0.1243 sec/batch Epoch: 9/20... Training Step: 3862... Training loss: 1.4009... 0.1232 sec/batch Epoch: 9/20... Training Step: 3863... Training loss: 1.1578... 0.1417 sec/batch Epoch: 9/20... Training Step: 3864... Training loss: 1.2086... 0.1265 sec/batch Epoch: 9/20... Training Step: 3865... Training loss: 1.2938... 0.1335 sec/batch Epoch: 9/20... Training Step: 3866... Training loss: 1.1621... 0.1250 sec/batch Epoch: 9/20... Training Step: 3867... Training loss: 1.2238... 0.1235 sec/batch Epoch: 9/20... Training Step: 3868... Training loss: 1.1627... 0.1143 sec/batch Epoch: 9/20... Training Step: 3869... Training loss: 1.0434... 0.1181 sec/batch Epoch: 9/20... Training Step: 3870... Training loss: 1.2588... 0.1169 sec/batch Epoch: 9/20... Training Step: 3871... Training loss: 0.9961... 0.1212 sec/batch Epoch: 9/20... Training Step: 3872... Training loss: 1.0530... 0.1159 sec/batch Epoch: 9/20... Training Step: 3873... Training loss: 1.3061... 0.1309 sec/batch Epoch: 9/20... Training Step: 3874... Training loss: 1.1153... 0.1298 sec/batch Epoch: 9/20... Training Step: 3875... Training loss: 1.3081... 0.1313 sec/batch Epoch: 9/20... Training Step: 3876... Training loss: 1.0099... 0.1303 sec/batch Epoch: 9/20... Training Step: 3877... Training loss: 1.2241... 0.1204 sec/batch Epoch: 9/20... Training Step: 3878... Training loss: 1.0746... 0.1218 sec/batch Epoch: 9/20... Training Step: 3879... Training loss: 1.0854... 0.1235 sec/batch Epoch: 9/20... Training Step: 3880... Training loss: 1.4502... 0.1279 sec/batch Epoch: 9/20... Training Step: 3881... Training loss: 1.1510... 0.1259 sec/batch Epoch: 9/20... Training Step: 3882... Training loss: 1.2109... 0.1257 sec/batch Epoch: 9/20... Training Step: 3883... Training loss: 1.2336... 0.1221 sec/batch Epoch: 9/20... Training Step: 3884... Training loss: 1.2690... 0.1159 sec/batch Epoch: 9/20... Training Step: 3885... Training loss: 0.9936... 0.1190 sec/batch Epoch: 9/20... Training Step: 3886... Training loss: 1.2161... 0.1295 sec/batch Epoch: 9/20... Training Step: 3887... Training loss: 1.2669... 0.1153 sec/batch Epoch: 9/20... Training Step: 3888... Training loss: 1.0813... 0.1314 sec/batch Epoch: 9/20... Training Step: 3889... Training loss: 1.0671... 0.1246 sec/batch Epoch: 9/20... Training Step: 3890... Training loss: 1.2827... 0.1267 sec/batch Epoch: 9/20... Training Step: 3891... Training loss: 1.0078... 0.1320 sec/batch Epoch: 9/20... Training Step: 3892... Training loss: 1.2362... 0.1163 sec/batch Epoch: 9/20... Training Step: 3893... Training loss: 1.0349... 0.1275 sec/batch Epoch: 9/20... Training Step: 3894... Training loss: 1.2117... 0.1288 sec/batch Epoch: 9/20... Training Step: 3895... Training loss: 1.3236... 0.1272 sec/batch Epoch: 9/20... Training Step: 3896... Training loss: 1.2329... 0.1274 sec/batch Epoch: 9/20... Training Step: 3897... Training loss: 1.2642... 0.1353 sec/batch Epoch: 9/20... Training Step: 3898... Training loss: 1.2242... 0.1250 sec/batch Epoch: 9/20... Training Step: 3899... Training loss: 1.2828... 0.1202 sec/batch Epoch: 9/20... Training Step: 3900... Training loss: 1.1515... 0.1188 sec/batch Epoch: 9/20... Training Step: 3901... Training loss: 1.3210... 0.1207 sec/batch Epoch: 9/20... Training Step: 3902... Training loss: 1.1601... 0.1314 sec/batch Epoch: 9/20... Training Step: 3903... Training loss: 1.0494... 0.1289 sec/batch Epoch: 9/20... Training Step: 3904... Training loss: 1.2767... 0.1201 sec/batch Epoch: 9/20... Training Step: 3905... Training loss: 1.1415... 0.1268 sec/batch Epoch: 9/20... Training Step: 3906... Training loss: 1.2722... 0.1244 sec/batch Epoch: 9/20... Training Step: 3907... Training loss: 1.2561... 0.1289 sec/batch Epoch: 9/20... Training Step: 3908... Training loss: 1.2410... 0.1237 sec/batch Epoch: 9/20... Training Step: 3909... Training loss: 1.1165... 0.1302 sec/batch Epoch: 9/20... Training Step: 3910... Training loss: 1.2457... 0.1215 sec/batch Epoch: 9/20... Training Step: 3911... Training loss: 0.9192... 0.1265 sec/batch Epoch: 9/20... Training Step: 3912... Training loss: 1.2001... 0.1278 sec/batch Epoch: 9/20... Training Step: 3913... Training loss: 1.1458... 0.1390 sec/batch Epoch: 9/20... Training Step: 3914... Training loss: 1.1605... 0.1344 sec/batch Epoch: 9/20... Training Step: 3915... Training loss: 1.1611... 0.1279 sec/batch Epoch: 9/20... Training Step: 3916... Training loss: 1.3208... 0.1329 sec/batch Epoch: 9/20... Training Step: 3917... Training loss: 1.0593... 0.1259 sec/batch Epoch: 9/20... Training Step: 3918... Training loss: 1.1644... 0.1257 sec/batch Epoch: 9/20... Training Step: 3919... Training loss: 1.0960... 0.1139 sec/batch Epoch: 9/20... Training Step: 3920... Training loss: 1.2383... 0.1248 sec/batch Epoch: 9/20... Training Step: 3921... Training loss: 1.1366... 0.1199 sec/batch Epoch: 9/20... Training Step: 3922... Training loss: 1.0142... 0.1248 sec/batch Epoch: 9/20... Training Step: 3923... Training loss: 1.0302... 0.1314 sec/batch Epoch: 9/20... Training Step: 3924... Training loss: 1.2199... 0.1280 sec/batch Epoch: 9/20... Training Step: 3925... Training loss: 1.3143... 0.1184 sec/batch Epoch: 9/20... Training Step: 3926... Training loss: 1.0695... 0.1178 sec/batch Epoch: 9/20... Training Step: 3927... Training loss: 1.3217... 0.1234 sec/batch Epoch: 9/20... Training Step: 3928... Training loss: 1.1344... 0.1255 sec/batch Epoch: 9/20... Training Step: 3929... Training loss: 1.2234... 0.1192 sec/batch Epoch: 9/20... Training Step: 3930... Training loss: 1.1301... 0.1193 sec/batch Epoch: 9/20... Training Step: 3931... Training loss: 1.3356... 0.1171 sec/batch Epoch: 9/20... Training Step: 3932... Training loss: 1.1714... 0.1158 sec/batch Epoch: 9/20... Training Step: 3933... Training loss: 1.1798... 0.1183 sec/batch Epoch: 9/20... Training Step: 3934... Training loss: 1.4757... 0.1187 sec/batch Epoch: 9/20... Training Step: 3935... Training loss: 1.2272... 0.1156 sec/batch Epoch: 9/20... Training Step: 3936... Training loss: 1.3206... 0.1194 sec/batch Epoch: 9/20... Training Step: 3937... Training loss: 1.1877... 0.1483 sec/batch Epoch: 9/20... Training Step: 3938... Training loss: 1.3636... 0.1239 sec/batch Epoch: 9/20... Training Step: 3939... Training loss: 1.4350... 0.1269 sec/batch Epoch: 9/20... Training Step: 3940... Training loss: 1.1233... 0.1291 sec/batch Epoch: 9/20... Training Step: 3941... Training loss: 1.2520... 0.1284 sec/batch Epoch: 9/20... Training Step: 3942... Training loss: 1.1451... 0.1252 sec/batch Epoch: 9/20... Training Step: 3943... Training loss: 1.3654... 0.1242 sec/batch Epoch: 9/20... Training Step: 3944... Training loss: 1.1550... 0.1200 sec/batch Epoch: 9/20... Training Step: 3945... Training loss: 1.4274... 0.1149 sec/batch Epoch: 9/20... Training Step: 3946... Training loss: 1.1491... 0.1196 sec/batch Epoch: 9/20... Training Step: 3947... Training loss: 1.4354... 0.1180 sec/batch Epoch: 9/20... Training Step: 3948... Training loss: 1.1841... 0.1209 sec/batch Epoch: 9/20... Training Step: 3949... Training loss: 1.3754... 0.1175 sec/batch Epoch: 9/20... Training Step: 3950... Training loss: 1.1041... 0.1124 sec/batch Epoch: 9/20... Training Step: 3951... Training loss: 1.2725... 0.1197 sec/batch Epoch: 9/20... Training Step: 3952... Training loss: 1.2727... 0.1170 sec/batch Epoch: 9/20... Training Step: 3953... Training loss: 1.1806... 0.1172 sec/batch Epoch: 9/20... Training Step: 3954... Training loss: 1.1604... 0.1166 sec/batch Epoch: 9/20... Training Step: 3955... Training loss: 1.2014... 0.1191 sec/batch Epoch: 9/20... Training Step: 3956... Training loss: 1.2062... 0.1165 sec/batch Epoch: 9/20... Training Step: 3957... Training loss: 1.2627... 0.1201 sec/batch Epoch: 9/20... Training Step: 3958... Training loss: 1.0833... 0.1244 sec/batch Epoch: 9/20... Training Step: 3959... Training loss: 1.0972... 0.1273 sec/batch Epoch: 9/20... Training Step: 3960... Training loss: 1.3095... 0.1316 sec/batch Epoch: 9/20... Training Step: 3961... Training loss: 1.1242... 0.1246 sec/batch Epoch: 9/20... Training Step: 3962... Training loss: 1.2725... 0.1283 sec/batch Epoch: 9/20... Training Step: 3963... Training loss: 1.2592... 0.1301 sec/batch Epoch: 9/20... Training Step: 3964... Training loss: 1.1886... 0.1351 sec/batch Epoch: 9/20... Training Step: 3965... Training loss: 1.1089... 0.1375 sec/batch Epoch: 9/20... Training Step: 3966... Training loss: 1.1956... 0.1314 sec/batch Epoch: 9/20... Training Step: 3967... Training loss: 1.1517... 0.1309 sec/batch Epoch: 9/20... Training Step: 3968... Training loss: 1.1358... 0.1332 sec/batch Epoch: 9/20... Training Step: 3969... Training loss: 1.4692... 0.1351 sec/batch Epoch: 9/20... Training Step: 3970... Training loss: 1.0327... 0.1335 sec/batch Epoch: 9/20... Training Step: 3971... Training loss: 1.1087... 0.1218 sec/batch Epoch: 9/20... Training Step: 3972... Training loss: 1.0598... 0.1253 sec/batch Epoch: 9/20... Training Step: 3973... Training loss: 1.2052... 0.1303 sec/batch Epoch: 9/20... Training Step: 3974... Training loss: 1.2612... 0.1245 sec/batch Epoch: 9/20... Training Step: 3975... Training loss: 1.1524... 0.1229 sec/batch Epoch: 9/20... Training Step: 3976... Training loss: 1.2045... 0.1274 sec/batch Epoch: 9/20... Training Step: 3977... Training loss: 1.2814... 0.1277 sec/batch Epoch: 9/20... Training Step: 3978... Training loss: 1.2965... 0.1314 sec/batch Epoch: 9/20... Training Step: 3979... Training loss: 1.4474... 0.1273 sec/batch Epoch: 9/20... Training Step: 3980... Training loss: 1.3096... 0.1202 sec/batch Epoch: 9/20... Training Step: 3981... Training loss: 1.2294... 0.1135 sec/batch Epoch: 9/20... Training Step: 3982... Training loss: 1.3974... 0.1149 sec/batch Epoch: 9/20... Training Step: 3983... Training loss: 1.3126... 0.1180 sec/batch Epoch: 9/20... Training Step: 3984... Training loss: 1.3374... 0.1260 sec/batch Epoch: 9/20... Training Step: 3985... Training loss: 1.3046... 0.1347 sec/batch Epoch: 9/20... Training Step: 3986... Training loss: 1.2181... 0.1285 sec/batch Epoch: 9/20... Training Step: 3987... Training loss: 1.3277... 0.1259 sec/batch Epoch: 9/20... Training Step: 3988... Training loss: 1.0947... 0.1309 sec/batch Epoch: 9/20... Training Step: 3989... Training loss: 1.1564... 0.1239 sec/batch Epoch: 9/20... Training Step: 3990... Training loss: 1.3555... 0.1228 sec/batch Epoch: 9/20... Training Step: 3991... Training loss: 1.1499... 0.1308 sec/batch Epoch: 9/20... Training Step: 3992... Training loss: 1.2126... 0.1283 sec/batch Epoch: 9/20... Training Step: 3993... Training loss: 1.1097... 0.1471 sec/batch Epoch: 9/20... Training Step: 3994... Training loss: 1.2027... 0.1399 sec/batch Epoch: 9/20... Training Step: 3995... Training loss: 1.0800... 0.1305 sec/batch Epoch: 9/20... Training Step: 3996... Training loss: 1.2544... 0.1428 sec/batch Epoch: 9/20... Training Step: 3997... Training loss: 1.0967... 0.1328 sec/batch Epoch: 9/20... Training Step: 3998... Training loss: 1.2055... 0.1287 sec/batch Epoch: 9/20... Training Step: 3999... Training loss: 1.2410... 0.1389 sec/batch Epoch: 9/20... Training Step: 4000... Training loss: 1.3239... 0.1429 sec/batch Epoch: 9/20... Training Step: 4001... Training loss: 1.2892... 0.1188 sec/batch Epoch: 9/20... Training Step: 4002... Training loss: 1.2924... 0.1138 sec/batch Epoch: 9/20... Training Step: 4003... Training loss: 1.1903... 0.1208 sec/batch Epoch: 9/20... Training Step: 4004... Training loss: 1.0960... 0.1248 sec/batch Epoch: 9/20... Training Step: 4005... Training loss: 1.1214... 0.1221 sec/batch Epoch: 9/20... Training Step: 4006... Training loss: 1.1866... 0.1257 sec/batch Epoch: 9/20... Training Step: 4007... Training loss: 1.2709... 0.1147 sec/batch Epoch: 9/20... Training Step: 4008... Training loss: 1.5377... 0.1157 sec/batch Epoch: 9/20... Training Step: 4009... Training loss: 1.1470... 0.1174 sec/batch Epoch: 9/20... Training Step: 4010... Training loss: 1.1955... 0.1251 sec/batch Epoch: 9/20... Training Step: 4011... Training loss: 1.1117... 0.1261 sec/batch Epoch: 9/20... Training Step: 4012... Training loss: 1.2144... 0.1316 sec/batch Epoch: 9/20... Training Step: 4013... Training loss: 1.1689... 0.1246 sec/batch Epoch: 9/20... Training Step: 4014... Training loss: 1.2033... 0.1256 sec/batch Epoch: 9/20... Training Step: 4015... Training loss: 0.9384... 0.1258 sec/batch Epoch: 9/20... Training Step: 4016... Training loss: 1.3535... 0.1288 sec/batch Epoch: 9/20... Training Step: 4017... Training loss: 1.0835... 0.1284 sec/batch Epoch: 9/20... Training Step: 4018... Training loss: 1.1843... 0.1267 sec/batch Epoch: 9/20... Training Step: 4019... Training loss: 1.2152... 0.1306 sec/batch Epoch: 9/20... Training Step: 4020... Training loss: 1.5008... 0.1229 sec/batch Epoch: 9/20... Training Step: 4021... Training loss: 1.2543... 0.1341 sec/batch Epoch: 9/20... Training Step: 4022... Training loss: 1.3007... 0.1277 sec/batch Epoch: 9/20... Training Step: 4023... Training loss: 1.1771... 0.1170 sec/batch Epoch: 9/20... Training Step: 4024... Training loss: 1.1595... 0.1199 sec/batch Epoch: 9/20... Training Step: 4025... Training loss: 1.1253... 0.1258 sec/batch Epoch: 9/20... Training Step: 4026... Training loss: 1.1427... 0.1306 sec/batch Epoch: 9/20... Training Step: 4027... Training loss: 0.9541... 0.1280 sec/batch Epoch: 9/20... Training Step: 4028... Training loss: 1.0082... 0.1393 sec/batch Epoch: 9/20... Training Step: 4029... Training loss: 1.1587... 0.1402 sec/batch Epoch: 9/20... Training Step: 4030... Training loss: 1.0303... 0.1262 sec/batch Epoch: 9/20... Training Step: 4031... Training loss: 1.0602... 0.1254 sec/batch Epoch: 9/20... Training Step: 4032... Training loss: 1.0453... 0.1129 sec/batch Epoch: 9/20... Training Step: 4033... Training loss: 1.0932... 0.1146 sec/batch Epoch: 9/20... Training Step: 4034... Training loss: 1.3645... 0.1263 sec/batch Epoch: 9/20... Training Step: 4035... Training loss: 1.0722... 0.1265 sec/batch Epoch: 9/20... Training Step: 4036... Training loss: 0.9849... 0.1279 sec/batch Epoch: 9/20... Training Step: 4037... Training loss: 1.0758... 0.1222 sec/batch Epoch: 9/20... Training Step: 4038... Training loss: 1.0089... 0.1216 sec/batch Epoch: 9/20... Training Step: 4039... Training loss: 1.1203... 0.1196 sec/batch Epoch: 9/20... Training Step: 4040... Training loss: 1.0915... 0.1174 sec/batch Epoch: 9/20... Training Step: 4041... Training loss: 1.2433... 0.1194 sec/batch Epoch: 9/20... Training Step: 4042... Training loss: 1.1655... 0.1175 sec/batch Epoch: 9/20... Training Step: 4043... Training loss: 1.1939... 0.1162 sec/batch Epoch: 9/20... Training Step: 4044... Training loss: 1.1152... 0.1207 sec/batch Epoch: 9/20... Training Step: 4045... Training loss: 1.1523... 0.1275 sec/batch Epoch: 9/20... Training Step: 4046... Training loss: 1.0753... 0.1216 sec/batch Epoch: 9/20... Training Step: 4047... Training loss: 1.1885... 0.1239 sec/batch Epoch: 9/20... Training Step: 4048... Training loss: 1.1437... 0.1265 sec/batch Epoch: 9/20... Training Step: 4049... Training loss: 1.2248... 0.1298 sec/batch Epoch: 9/20... Training Step: 4050... Training loss: 1.0015... 0.1242 sec/batch Epoch: 9/20... Training Step: 4051... Training loss: 1.2726... 0.1254 sec/batch Epoch: 9/20... Training Step: 4052... Training loss: 1.0893... 0.1185 sec/batch Epoch: 9/20... Training Step: 4053... Training loss: 1.0452... 0.1299 sec/batch Epoch: 9/20... Training Step: 4054... Training loss: 1.1615... 0.1172 sec/batch Epoch: 9/20... Training Step: 4055... Training loss: 1.0424... 0.1181 sec/batch Epoch: 9/20... Training Step: 4056... Training loss: 1.4045... 0.1177 sec/batch Epoch: 9/20... Training Step: 4057... Training loss: 1.0259... 0.1143 sec/batch Epoch: 9/20... Training Step: 4058... Training loss: 0.9632... 0.1202 sec/batch Epoch: 9/20... Training Step: 4059... Training loss: 0.9937... 0.1216 sec/batch Epoch: 9/20... Training Step: 4060... Training loss: 1.4693... 0.1215 sec/batch Epoch: 9/20... Training Step: 4061... Training loss: 0.9853... 0.1124 sec/batch Epoch: 9/20... Training Step: 4062... Training loss: 1.2074... 0.1253 sec/batch Epoch: 9/20... Training Step: 4063... Training loss: 1.1137... 0.1240 sec/batch Epoch: 9/20... Training Step: 4064... Training loss: 1.1248... 0.1257 sec/batch Epoch: 9/20... Training Step: 4065... Training loss: 1.0584... 0.1261 sec/batch Epoch: 9/20... Training Step: 4066... Training loss: 0.8470... 0.1239 sec/batch Epoch: 9/20... Training Step: 4067... Training loss: 1.2456... 0.1284 sec/batch Epoch: 9/20... Training Step: 4068... Training loss: 1.1321... 0.1239 sec/batch Epoch: 9/20... Training Step: 4069... Training loss: 1.1332... 0.1297 sec/batch Epoch: 9/20... Training Step: 4070... Training loss: 1.3777... 0.1235 sec/batch Epoch: 9/20... Training Step: 4071... Training loss: 1.3316... 0.1225 sec/batch Epoch: 9/20... Training Step: 4072... Training loss: 1.0148... 0.1251 sec/batch Epoch: 9/20... Training Step: 4073... Training loss: 1.2889... 0.1264 sec/batch Epoch: 9/20... Training Step: 4074... Training loss: 1.2710... 0.1227 sec/batch Epoch: 9/20... Training Step: 4075... Training loss: 0.9636... 0.1207 sec/batch Epoch: 9/20... Training Step: 4076... Training loss: 1.1778... 0.1205 sec/batch Epoch: 9/20... Training Step: 4077... Training loss: 1.1487... 0.1274 sec/batch Epoch: 9/20... Training Step: 4078... Training loss: 1.2552... 0.1306 sec/batch Epoch: 9/20... Training Step: 4079... Training loss: 1.1047... 0.1289 sec/batch Epoch: 9/20... Training Step: 4080... Training loss: 1.2616... 0.1309 sec/batch Epoch: 9/20... Training Step: 4081... Training loss: 1.1982... 0.1198 sec/batch Epoch: 9/20... Training Step: 4082... Training loss: 1.1430... 0.1231 sec/batch Epoch: 9/20... Training Step: 4083... Training loss: 1.0058... 0.1249 sec/batch Epoch: 9/20... Training Step: 4084... Training loss: 1.0760... 0.1290 sec/batch Epoch: 9/20... Training Step: 4085... Training loss: 0.9818... 0.1256 sec/batch Epoch: 9/20... Training Step: 4086... Training loss: 1.2802... 0.1239 sec/batch Epoch: 9/20... Training Step: 4087... Training loss: 1.2483... 0.1280 sec/batch Epoch: 9/20... Training Step: 4088... Training loss: 1.2787... 0.1259 sec/batch Epoch: 9/20... Training Step: 4089... Training loss: 1.3062... 0.1217 sec/batch Epoch: 9/20... Training Step: 4090... Training loss: 1.2343... 0.1239 sec/batch Epoch: 9/20... Training Step: 4091... Training loss: 1.0578... 0.1263 sec/batch Epoch: 9/20... Training Step: 4092... Training loss: 1.2267... 0.1229 sec/batch Epoch: 9/20... Training Step: 4093... Training loss: 1.0127... 0.1171 sec/batch Epoch: 9/20... Training Step: 4094... Training loss: 1.0636... 0.1263 sec/batch Epoch: 9/20... Training Step: 4095... Training loss: 0.9899... 0.1276 sec/batch Epoch: 9/20... Training Step: 4096... Training loss: 1.1551... 0.1282 sec/batch Epoch: 9/20... Training Step: 4097... Training loss: 1.1092... 0.1253 sec/batch Epoch: 9/20... Training Step: 4098... Training loss: 1.1581... 0.1242 sec/batch Epoch: 9/20... Training Step: 4099... Training loss: 1.1301... 0.1316 sec/batch Epoch: 9/20... Training Step: 4100... Training loss: 0.9596... 0.1241 sec/batch Epoch: 9/20... Training Step: 4101... Training loss: 0.9633... 0.1344 sec/batch Epoch: 9/20... Training Step: 4102... Training loss: 1.1678... 0.1302 sec/batch Epoch: 9/20... Training Step: 4103... Training loss: 1.0343... 0.1274 sec/batch Epoch: 9/20... Training Step: 4104... Training loss: 1.0770... 0.1214 sec/batch Epoch: 9/20... Training Step: 4105... Training loss: 1.1124... 0.1271 sec/batch Epoch: 9/20... Training Step: 4106... Training loss: 1.1495... 0.1298 sec/batch Epoch: 9/20... Training Step: 4107... Training loss: 1.0969... 0.1384 sec/batch Epoch: 9/20... Training Step: 4108... Training loss: 1.2966... 0.1331 sec/batch Epoch: 9/20... Training Step: 4109... Training loss: 1.1019... 0.1315 sec/batch Epoch: 9/20... Training Step: 4110... Training loss: 1.0561... 0.1270 sec/batch Epoch: 9/20... Training Step: 4111... Training loss: 1.1790... 0.1321 sec/batch Epoch: 9/20... Training Step: 4112... Training loss: 1.1200... 0.1266 sec/batch Epoch: 9/20... Training Step: 4113... Training loss: 1.0765... 0.1349 sec/batch Epoch: 9/20... Training Step: 4114... Training loss: 1.1980... 0.1308 sec/batch Epoch: 9/20... Training Step: 4115... Training loss: 0.9648... 0.1306 sec/batch Epoch: 9/20... Training Step: 4116... Training loss: 1.2153... 0.1256 sec/batch Epoch: 9/20... Training Step: 4117... Training loss: 1.1668... 0.1291 sec/batch Epoch: 9/20... Training Step: 4118... Training loss: 1.2990... 0.1274 sec/batch Epoch: 9/20... Training Step: 4119... Training loss: 1.0973... 0.1289 sec/batch Epoch: 9/20... Training Step: 4120... Training loss: 1.4914... 0.1265 sec/batch Epoch: 9/20... Training Step: 4121... Training loss: 1.1640... 0.1263 sec/batch Epoch: 9/20... Training Step: 4122... Training loss: 1.1291... 0.1288 sec/batch Epoch: 9/20... Training Step: 4123... Training loss: 1.0696... 0.1305 sec/batch Epoch: 9/20... Training Step: 4124... Training loss: 1.0987... 0.1240 sec/batch Epoch: 9/20... Training Step: 4125... Training loss: 1.2570... 0.1285 sec/batch Epoch: 9/20... Training Step: 4126... Training loss: 1.2944... 0.1233 sec/batch Epoch: 9/20... Training Step: 4127... Training loss: 1.3613... 0.1226 sec/batch Epoch: 9/20... Training Step: 4128... Training loss: 1.4389... 0.1282 sec/batch Epoch: 9/20... Training Step: 4129... Training loss: 1.2639... 0.1182 sec/batch Epoch: 9/20... Training Step: 4130... Training loss: 1.0235... 0.1256 sec/batch Epoch: 9/20... Training Step: 4131... Training loss: 1.2217... 0.1287 sec/batch Epoch: 9/20... Training Step: 4132... Training loss: 1.0099... 0.1279 sec/batch Epoch: 9/20... Training Step: 4133... Training loss: 1.2878... 0.1200 sec/batch Epoch: 9/20... Training Step: 4134... Training loss: 1.2617... 0.1277 sec/batch Epoch: 9/20... Training Step: 4135... Training loss: 1.1982... 0.1236 sec/batch Epoch: 9/20... Training Step: 4136... Training loss: 1.3560... 0.1255 sec/batch Epoch: 9/20... Training Step: 4137... Training loss: 1.0892... 0.1286 sec/batch Epoch: 9/20... Training Step: 4138... Training loss: 1.1437... 0.1234 sec/batch Epoch: 9/20... Training Step: 4139... Training loss: 1.2856... 0.1152 sec/batch Epoch: 9/20... Training Step: 4140... Training loss: 1.1652... 0.1137 sec/batch Epoch: 9/20... Training Step: 4141... Training loss: 1.0497... 0.1234 sec/batch Epoch: 9/20... Training Step: 4142... Training loss: 1.1332... 0.1409 sec/batch Epoch: 9/20... Training Step: 4143... Training loss: 1.3548... 0.1249 sec/batch Epoch: 9/20... Training Step: 4144... Training loss: 1.0759... 0.1264 sec/batch Epoch: 9/20... Training Step: 4145... Training loss: 1.3749... 0.1290 sec/batch Epoch: 9/20... Training Step: 4146... Training loss: 1.3487... 0.1269 sec/batch Epoch: 9/20... Training Step: 4147... Training loss: 1.0991... 0.1261 sec/batch Epoch: 9/20... Training Step: 4148... Training loss: 1.2306... 0.1252 sec/batch Epoch: 9/20... Training Step: 4149... Training loss: 1.1169... 0.1240 sec/batch Epoch: 9/20... Training Step: 4150... Training loss: 1.2973... 0.1283 sec/batch Epoch: 9/20... Training Step: 4151... Training loss: 1.4494... 0.1304 sec/batch Epoch: 9/20... Training Step: 4152... Training loss: 1.5044... 0.1265 sec/batch Epoch: 9/20... Training Step: 4153... Training loss: 1.2041... 0.1281 sec/batch Epoch: 9/20... Training Step: 4154... Training loss: 1.1673... 0.1269 sec/batch Epoch: 9/20... Training Step: 4155... Training loss: 1.3387... 0.1257 sec/batch Epoch: 9/20... Training Step: 4156... Training loss: 1.1266... 0.1258 sec/batch Epoch: 9/20... Training Step: 4157... Training loss: 1.2123... 0.1238 sec/batch Epoch: 9/20... Training Step: 4158... Training loss: 1.1009... 0.1260 sec/batch Epoch: 9/20... Training Step: 4159... Training loss: 1.2821... 0.1286 sec/batch Epoch: 9/20... Training Step: 4160... Training loss: 1.0411... 0.1303 sec/batch Epoch: 9/20... Training Step: 4161... Training loss: 1.2089... 0.1272 sec/batch Epoch: 9/20... Training Step: 4162... Training loss: 1.1728... 0.1201 sec/batch Epoch: 9/20... Training Step: 4163... Training loss: 1.0922... 0.1321 sec/batch Epoch: 9/20... Training Step: 4164... Training loss: 1.2159... 0.1192 sec/batch Epoch: 9/20... Training Step: 4165... Training loss: 1.1454... 0.1219 sec/batch Epoch: 9/20... Training Step: 4166... Training loss: 1.5105... 0.1274 sec/batch Epoch: 9/20... Training Step: 4167... Training loss: 1.1704... 0.1215 sec/batch Epoch: 9/20... Training Step: 4168... Training loss: 1.0189... 0.1283 sec/batch Epoch: 9/20... Training Step: 4169... Training loss: 1.1923... 0.1291 sec/batch Epoch: 9/20... Training Step: 4170... Training loss: 0.9818... 0.1288 sec/batch Epoch: 9/20... Training Step: 4171... Training loss: 1.0383... 0.1242 sec/batch Epoch: 9/20... Training Step: 4172... Training loss: 1.2189... 0.1352 sec/batch Epoch: 9/20... Training Step: 4173... Training loss: 1.2283... 0.1277 sec/batch Epoch: 9/20... Training Step: 4174... Training loss: 1.1110... 0.1258 sec/batch Epoch: 9/20... Training Step: 4175... Training loss: 1.1709... 0.1247 sec/batch Epoch: 9/20... Training Step: 4176... Training loss: 1.1041... 0.1259 sec/batch Epoch: 10/20... Training Step: 4177... Training loss: 1.4897... 0.1271 sec/batch Epoch: 10/20... Training Step: 4178... Training loss: 1.3029... 0.1230 sec/batch Epoch: 10/20... Training Step: 4179... Training loss: 1.2326... 0.1225 sec/batch Epoch: 10/20... Training Step: 4180... Training loss: 1.1466... 0.1249 sec/batch Epoch: 10/20... Training Step: 4181... Training loss: 1.2613... 0.1247 sec/batch Epoch: 10/20... Training Step: 4182... Training loss: 0.9899... 0.1215 sec/batch Epoch: 10/20... Training Step: 4183... Training loss: 1.3281... 0.1228 sec/batch Epoch: 10/20... Training Step: 4184... Training loss: 1.0899... 0.1228 sec/batch Epoch: 10/20... Training Step: 4185... Training loss: 0.9513... 0.1291 sec/batch Epoch: 10/20... Training Step: 4186... Training loss: 1.1952... 0.1224 sec/batch Epoch: 10/20... Training Step: 4187... Training loss: 1.1751... 0.1340 sec/batch Epoch: 10/20... Training Step: 4188... Training loss: 0.9337... 0.1248 sec/batch Epoch: 10/20... Training Step: 4189... Training loss: 1.2610... 0.1250 sec/batch Epoch: 10/20... Training Step: 4190... Training loss: 0.9347... 0.1313 sec/batch Epoch: 10/20... Training Step: 4191... Training loss: 1.1948... 0.1248 sec/batch Epoch: 10/20... Training Step: 4192... Training loss: 1.2187... 0.1253 sec/batch Epoch: 10/20... Training Step: 4193... Training loss: 1.1291... 0.1322 sec/batch Epoch: 10/20... Training Step: 4194... Training loss: 1.0373... 0.1194 sec/batch Epoch: 10/20... Training Step: 4195... Training loss: 1.2021... 0.1244 sec/batch Epoch: 10/20... Training Step: 4196... Training loss: 1.0496... 0.1258 sec/batch Epoch: 10/20... Training Step: 4197... Training loss: 1.2756... 0.1135 sec/batch Epoch: 10/20... Training Step: 4198... Training loss: 1.1040... 0.1172 sec/batch Epoch: 10/20... Training Step: 4199... Training loss: 1.1884... 0.1255 sec/batch Epoch: 10/20... Training Step: 4200... Training loss: 1.0920... 0.1208 sec/batch Epoch: 10/20... Training Step: 4201... Training loss: 1.1449... 0.1260 sec/batch Epoch: 10/20... Training Step: 4202... Training loss: 1.1218... 0.1211 sec/batch Epoch: 10/20... Training Step: 4203... Training loss: 1.1524... 0.1202 sec/batch Epoch: 10/20... Training Step: 4204... Training loss: 0.9931... 0.1313 sec/batch Epoch: 10/20... Training Step: 4205... Training loss: 1.0479... 0.1202 sec/batch Epoch: 10/20... Training Step: 4206... Training loss: 1.2005... 0.1263 sec/batch Epoch: 10/20... Training Step: 4207... Training loss: 1.0116... 0.1325 sec/batch Epoch: 10/20... Training Step: 4208... Training loss: 1.0173... 0.1291 sec/batch Epoch: 10/20... Training Step: 4209... Training loss: 1.0208... 0.1301 sec/batch Epoch: 10/20... Training Step: 4210... Training loss: 0.9153... 0.1299 sec/batch Epoch: 10/20... Training Step: 4211... Training loss: 0.9842... 0.1251 sec/batch Epoch: 10/20... Training Step: 4212... Training loss: 1.0295... 0.1231 sec/batch Epoch: 10/20... Training Step: 4213... Training loss: 1.1670... 0.1288 sec/batch Epoch: 10/20... Training Step: 4214... Training loss: 1.0439... 0.1253 sec/batch Epoch: 10/20... Training Step: 4215... Training loss: 1.0147... 0.1246 sec/batch Epoch: 10/20... Training Step: 4216... Training loss: 1.4644... 0.1261 sec/batch Epoch: 10/20... Training Step: 4217... Training loss: 1.1416... 0.1275 sec/batch Epoch: 10/20... Training Step: 4218... Training loss: 1.1423... 0.1278 sec/batch Epoch: 10/20... Training Step: 4219... Training loss: 1.3705... 0.1302 sec/batch Epoch: 10/20... Training Step: 4220... Training loss: 0.9318... 0.1247 sec/batch Epoch: 10/20... Training Step: 4221... Training loss: 1.0997... 0.1217 sec/batch Epoch: 10/20... Training Step: 4222... Training loss: 1.0647... 0.1252 sec/batch Epoch: 10/20... Training Step: 4223... Training loss: 1.1137... 0.1231 sec/batch Epoch: 10/20... Training Step: 4224... Training loss: 1.0588... 0.1270 sec/batch Epoch: 10/20... Training Step: 4225... Training loss: 1.1267... 0.1254 sec/batch Epoch: 10/20... Training Step: 4226... Training loss: 1.1558... 0.1162 sec/batch Epoch: 10/20... Training Step: 4227... Training loss: 1.0758... 0.1149 sec/batch Epoch: 10/20... Training Step: 4228... Training loss: 1.1836... 0.1137 sec/batch Epoch: 10/20... Training Step: 4229... Training loss: 1.0702... 0.1143 sec/batch Epoch: 10/20... Training Step: 4230... Training loss: 1.0644... 0.1209 sec/batch Epoch: 10/20... Training Step: 4231... Training loss: 1.0265... 0.1222 sec/batch Epoch: 10/20... Training Step: 4232... Training loss: 1.0832... 0.1168 sec/batch Epoch: 10/20... Training Step: 4233... Training loss: 1.2093... 0.1128 sec/batch Epoch: 10/20... Training Step: 4234... Training loss: 1.2224... 0.1286 sec/batch Epoch: 10/20... Training Step: 4235... Training loss: 0.9000... 0.1242 sec/batch Epoch: 10/20... Training Step: 4236... Training loss: 1.0365... 0.1236 sec/batch Epoch: 10/20... Training Step: 4237... Training loss: 0.9875... 0.1198 sec/batch Epoch: 10/20... Training Step: 4238... Training loss: 1.2142... 0.1200 sec/batch Epoch: 10/20... Training Step: 4239... Training loss: 1.0867... 0.1296 sec/batch Epoch: 10/20... Training Step: 4240... Training loss: 1.2296... 0.1279 sec/batch Epoch: 10/20... Training Step: 4241... Training loss: 1.0848... 0.1213 sec/batch Epoch: 10/20... Training Step: 4242... Training loss: 1.2648... 0.1225 sec/batch Epoch: 10/20... Training Step: 4243... Training loss: 1.0764... 0.1338 sec/batch Epoch: 10/20... Training Step: 4244... Training loss: 1.1088... 0.1301 sec/batch Epoch: 10/20... Training Step: 4245... Training loss: 0.9594... 0.1322 sec/batch Epoch: 10/20... Training Step: 4246... Training loss: 1.1748... 0.1285 sec/batch Epoch: 10/20... Training Step: 4247... Training loss: 1.3319... 0.1325 sec/batch Epoch: 10/20... Training Step: 4248... Training loss: 0.9734... 0.1305 sec/batch Epoch: 10/20... Training Step: 4249... Training loss: 1.0947... 0.1309 sec/batch Epoch: 10/20... Training Step: 4250... Training loss: 0.9825... 0.1289 sec/batch Epoch: 10/20... Training Step: 4251... Training loss: 1.2376... 0.1262 sec/batch Epoch: 10/20... Training Step: 4252... Training loss: 0.9625... 0.1315 sec/batch Epoch: 10/20... Training Step: 4253... Training loss: 1.0855... 0.1282 sec/batch Epoch: 10/20... Training Step: 4254... Training loss: 1.1098... 0.1215 sec/batch Epoch: 10/20... Training Step: 4255... Training loss: 1.1574... 0.1167 sec/batch Epoch: 10/20... Training Step: 4256... Training loss: 1.0148... 0.1257 sec/batch Epoch: 10/20... Training Step: 4257... Training loss: 1.2213... 0.1250 sec/batch Epoch: 10/20... Training Step: 4258... Training loss: 1.1717... 0.1141 sec/batch Epoch: 10/20... Training Step: 4259... Training loss: 0.9735... 0.1138 sec/batch Epoch: 10/20... Training Step: 4260... Training loss: 1.3001... 0.1151 sec/batch Epoch: 10/20... Training Step: 4261... Training loss: 1.0866... 0.1203 sec/batch Epoch: 10/20... Training Step: 4262... Training loss: 1.2688... 0.1206 sec/batch Epoch: 10/20... Training Step: 4263... Training loss: 1.0210... 0.1297 sec/batch Epoch: 10/20... Training Step: 4264... Training loss: 1.3269... 0.1274 sec/batch Epoch: 10/20... Training Step: 4265... Training loss: 1.3788... 0.1263 sec/batch Epoch: 10/20... Training Step: 4266... Training loss: 1.1015... 0.1268 sec/batch Epoch: 10/20... Training Step: 4267... Training loss: 1.1486... 0.1246 sec/batch Epoch: 10/20... Training Step: 4268... Training loss: 1.2981... 0.1246 sec/batch Epoch: 10/20... Training Step: 4269... Training loss: 0.9786... 0.1242 sec/batch Epoch: 10/20... Training Step: 4270... Training loss: 1.3597... 0.1296 sec/batch Epoch: 10/20... Training Step: 4271... Training loss: 1.1045... 0.1284 sec/batch Epoch: 10/20... Training Step: 4272... Training loss: 1.1656... 0.1158 sec/batch Epoch: 10/20... Training Step: 4273... Training loss: 1.3334... 0.1295 sec/batch Epoch: 10/20... Training Step: 4274... Training loss: 1.1610... 0.1208 sec/batch Epoch: 10/20... Training Step: 4275... Training loss: 1.2580... 0.1288 sec/batch Epoch: 10/20... Training Step: 4276... Training loss: 1.1842... 0.1222 sec/batch Epoch: 10/20... Training Step: 4277... Training loss: 1.1798... 0.1227 sec/batch Epoch: 10/20... Training Step: 4278... Training loss: 1.3106... 0.1227 sec/batch Epoch: 10/20... Training Step: 4279... Training loss: 1.2596... 0.1286 sec/batch Epoch: 10/20... Training Step: 4280... Training loss: 1.0992... 0.1283 sec/batch Epoch: 10/20... Training Step: 4281... Training loss: 1.2577... 0.1271 sec/batch Epoch: 10/20... Training Step: 4282... Training loss: 1.2658... 0.1233 sec/batch Epoch: 10/20... Training Step: 4283... Training loss: 1.2340... 0.1272 sec/batch Epoch: 10/20... Training Step: 4284... Training loss: 1.1958... 0.1293 sec/batch Epoch: 10/20... Training Step: 4285... Training loss: 1.3253... 0.1268 sec/batch Epoch: 10/20... Training Step: 4286... Training loss: 1.1065... 0.1288 sec/batch Epoch: 10/20... Training Step: 4287... Training loss: 1.1880... 0.1249 sec/batch Epoch: 10/20... Training Step: 4288... Training loss: 1.0821... 0.1213 sec/batch Epoch: 10/20... Training Step: 4289... Training loss: 1.1503... 0.1309 sec/batch Epoch: 10/20... Training Step: 4290... Training loss: 1.2567... 0.1269 sec/batch Epoch: 10/20... Training Step: 4291... Training loss: 1.1389... 0.1217 sec/batch Epoch: 10/20... Training Step: 4292... Training loss: 1.1063... 0.1250 sec/batch Epoch: 10/20... Training Step: 4293... Training loss: 1.2297... 0.1275 sec/batch Epoch: 10/20... Training Step: 4294... Training loss: 1.2314... 0.1230 sec/batch Epoch: 10/20... Training Step: 4295... Training loss: 1.1724... 0.1316 sec/batch Epoch: 10/20... Training Step: 4296... Training loss: 1.0651... 0.1264 sec/batch Epoch: 10/20... Training Step: 4297... Training loss: 1.2658... 0.1291 sec/batch Epoch: 10/20... Training Step: 4298... Training loss: 1.1955... 0.1276 sec/batch Epoch: 10/20... Training Step: 4299... Training loss: 1.2097... 0.1231 sec/batch Epoch: 10/20... Training Step: 4300... Training loss: 1.2730... 0.1253 sec/batch Epoch: 10/20... Training Step: 4301... Training loss: 1.1441... 0.1301 sec/batch Epoch: 10/20... Training Step: 4302... Training loss: 1.1319... 0.1239 sec/batch Epoch: 10/20... Training Step: 4303... Training loss: 1.1309... 0.1341 sec/batch Epoch: 10/20... Training Step: 4304... Training loss: 1.2407... 0.1225 sec/batch Epoch: 10/20... Training Step: 4305... Training loss: 1.2400... 0.1304 sec/batch Epoch: 10/20... Training Step: 4306... Training loss: 1.2216... 0.1274 sec/batch Epoch: 10/20... Training Step: 4307... Training loss: 1.3206... 0.1330 sec/batch Epoch: 10/20... Training Step: 4308... Training loss: 1.0707... 0.1217 sec/batch Epoch: 10/20... Training Step: 4309... Training loss: 1.1349... 0.1300 sec/batch Epoch: 10/20... Training Step: 4310... Training loss: 1.3210... 0.1250 sec/batch Epoch: 10/20... Training Step: 4311... Training loss: 1.0921... 0.1145 sec/batch Epoch: 10/20... Training Step: 4312... Training loss: 0.8720... 0.1180 sec/batch Epoch: 10/20... Training Step: 4313... Training loss: 0.9262... 0.1242 sec/batch Epoch: 10/20... Training Step: 4314... Training loss: 1.1777... 0.1267 sec/batch Epoch: 10/20... Training Step: 4315... Training loss: 0.9608... 0.1284 sec/batch Epoch: 10/20... Training Step: 4316... Training loss: 1.0851... 0.1204 sec/batch Epoch: 10/20... Training Step: 4317... Training loss: 0.9827... 0.1241 sec/batch Epoch: 10/20... Training Step: 4318... Training loss: 1.0255... 0.1271 sec/batch Epoch: 10/20... Training Step: 4319... Training loss: 1.0028... 0.1302 sec/batch Epoch: 10/20... Training Step: 4320... Training loss: 1.0996... 0.1272 sec/batch Epoch: 10/20... Training Step: 4321... Training loss: 1.2163... 0.1225 sec/batch Epoch: 10/20... Training Step: 4322... Training loss: 1.1064... 0.1301 sec/batch Epoch: 10/20... Training Step: 4323... Training loss: 1.2679... 0.1273 sec/batch Epoch: 10/20... Training Step: 4324... Training loss: 1.0351... 0.1187 sec/batch Epoch: 10/20... Training Step: 4325... Training loss: 1.0708... 0.1242 sec/batch Epoch: 10/20... Training Step: 4326... Training loss: 1.4002... 0.1195 sec/batch Epoch: 10/20... Training Step: 4327... Training loss: 1.2838... 0.1214 sec/batch Epoch: 10/20... Training Step: 4328... Training loss: 1.2432... 0.1172 sec/batch Epoch: 10/20... Training Step: 4329... Training loss: 1.3235... 0.1148 sec/batch Epoch: 10/20... Training Step: 4330... Training loss: 1.1642... 0.1125 sec/batch Epoch: 10/20... Training Step: 4331... Training loss: 1.0724... 0.1151 sec/batch Epoch: 10/20... Training Step: 4332... Training loss: 1.1600... 0.1123 sec/batch Epoch: 10/20... Training Step: 4333... Training loss: 1.1040... 0.1124 sec/batch Epoch: 10/20... Training Step: 4334... Training loss: 1.0896... 0.1098 sec/batch Epoch: 10/20... Training Step: 4335... Training loss: 0.9887... 0.1123 sec/batch Epoch: 10/20... Training Step: 4336... Training loss: 1.1405... 0.1194 sec/batch Epoch: 10/20... Training Step: 4337... Training loss: 1.2233... 0.1215 sec/batch Epoch: 10/20... Training Step: 4338... Training loss: 1.0593... 0.1391 sec/batch Epoch: 10/20... Training Step: 4339... Training loss: 1.3096... 0.1254 sec/batch Epoch: 10/20... Training Step: 4340... Training loss: 1.0341... 0.1259 sec/batch Epoch: 10/20... Training Step: 4341... Training loss: 1.1713... 0.1252 sec/batch Epoch: 10/20... Training Step: 4342... Training loss: 1.0758... 0.1237 sec/batch Epoch: 10/20... Training Step: 4343... Training loss: 0.9925... 0.1243 sec/batch Epoch: 10/20... Training Step: 4344... Training loss: 1.2749... 0.1308 sec/batch Epoch: 10/20... Training Step: 4345... Training loss: 1.1590... 0.1251 sec/batch Epoch: 10/20... Training Step: 4346... Training loss: 1.2234... 0.1244 sec/batch Epoch: 10/20... Training Step: 4347... Training loss: 1.1414... 0.1237 sec/batch Epoch: 10/20... Training Step: 4348... Training loss: 1.1851... 0.1341 sec/batch Epoch: 10/20... Training Step: 4349... Training loss: 1.0287... 0.1261 sec/batch Epoch: 10/20... Training Step: 4350... Training loss: 1.0734... 0.1205 sec/batch Epoch: 10/20... Training Step: 4351... Training loss: 1.2144... 0.1226 sec/batch Epoch: 10/20... Training Step: 4352... Training loss: 1.0433... 0.1244 sec/batch Epoch: 10/20... Training Step: 4353... Training loss: 1.0484... 0.1323 sec/batch Epoch: 10/20... Training Step: 4354... Training loss: 1.2620... 0.1285 sec/batch Epoch: 10/20... Training Step: 4355... Training loss: 0.9245... 0.1266 sec/batch Epoch: 10/20... Training Step: 4356... Training loss: 1.1664... 0.1232 sec/batch Epoch: 10/20... Training Step: 4357... Training loss: 0.9381... 0.1257 sec/batch Epoch: 10/20... Training Step: 4358... Training loss: 1.3875... 0.1250 sec/batch Epoch: 10/20... Training Step: 4359... Training loss: 1.1439... 0.1240 sec/batch Epoch: 10/20... Training Step: 4360... Training loss: 1.1823... 0.1252 sec/batch Epoch: 10/20... Training Step: 4361... Training loss: 1.3091... 0.1271 sec/batch Epoch: 10/20... Training Step: 4362... Training loss: 1.1521... 0.1258 sec/batch Epoch: 10/20... Training Step: 4363... Training loss: 1.2272... 0.1230 sec/batch Epoch: 10/20... Training Step: 4364... Training loss: 1.0251... 0.1272 sec/batch Epoch: 10/20... Training Step: 4365... Training loss: 1.0933... 0.1143 sec/batch Epoch: 10/20... Training Step: 4366... Training loss: 1.1254... 0.1266 sec/batch Epoch: 10/20... Training Step: 4367... Training loss: 1.0964... 0.1263 sec/batch Epoch: 10/20... Training Step: 4368... Training loss: 1.2000... 0.1222 sec/batch Epoch: 10/20... Training Step: 4369... Training loss: 1.1610... 0.1242 sec/batch Epoch: 10/20... Training Step: 4370... Training loss: 1.0537... 0.1199 sec/batch Epoch: 10/20... Training Step: 4371... Training loss: 1.1871... 0.1140 sec/batch Epoch: 10/20... Training Step: 4372... Training loss: 1.1271... 0.1151 sec/batch Epoch: 10/20... Training Step: 4373... Training loss: 1.1223... 0.1179 sec/batch Epoch: 10/20... Training Step: 4374... Training loss: 1.1731... 0.1112 sec/batch Epoch: 10/20... Training Step: 4375... Training loss: 0.9271... 0.1143 sec/batch Epoch: 10/20... Training Step: 4376... Training loss: 1.2973... 0.1319 sec/batch Epoch: 10/20... Training Step: 4377... Training loss: 1.1086... 0.1283 sec/batch Epoch: 10/20... Training Step: 4378... Training loss: 1.1284... 0.1297 sec/batch Epoch: 10/20... Training Step: 4379... Training loss: 1.0454... 0.1301 sec/batch Epoch: 10/20... Training Step: 4380... Training loss: 1.3970... 0.1333 sec/batch Epoch: 10/20... Training Step: 4381... Training loss: 0.9962... 0.1295 sec/batch Epoch: 10/20... Training Step: 4382... Training loss: 1.1138... 0.1370 sec/batch Epoch: 10/20... Training Step: 4383... Training loss: 1.0737... 0.1362 sec/batch Epoch: 10/20... Training Step: 4384... Training loss: 1.1975... 0.1147 sec/batch Epoch: 10/20... Training Step: 4385... Training loss: 1.2926... 0.1251 sec/batch Epoch: 10/20... Training Step: 4386... Training loss: 0.9819... 0.1269 sec/batch Epoch: 10/20... Training Step: 4387... Training loss: 1.0702... 0.1279 sec/batch Epoch: 10/20... Training Step: 4388... Training loss: 1.2559... 0.1254 sec/batch Epoch: 10/20... Training Step: 4389... Training loss: 1.2826... 0.1299 sec/batch Epoch: 10/20... Training Step: 4390... Training loss: 1.1355... 0.1319 sec/batch Epoch: 10/20... Training Step: 4391... Training loss: 1.2754... 0.1469 sec/batch Epoch: 10/20... Training Step: 4392... Training loss: 1.2225... 0.1558 sec/batch Epoch: 10/20... Training Step: 4393... Training loss: 1.1693... 0.1311 sec/batch Epoch: 10/20... Training Step: 4394... Training loss: 1.1344... 0.1245 sec/batch Epoch: 10/20... Training Step: 4395... Training loss: 1.1934... 0.1165 sec/batch Epoch: 10/20... Training Step: 4396... Training loss: 1.1168... 0.1199 sec/batch Epoch: 10/20... Training Step: 4397... Training loss: 1.0950... 0.1218 sec/batch Epoch: 10/20... Training Step: 4398... Training loss: 1.2786... 0.1203 sec/batch Epoch: 10/20... Training Step: 4399... Training loss: 1.2243... 0.1139 sec/batch Epoch: 10/20... Training Step: 4400... Training loss: 1.3285... 0.1273 sec/batch Epoch: 10/20... Training Step: 4401... Training loss: 1.2273... 0.1207 sec/batch Epoch: 10/20... Training Step: 4402... Training loss: 1.2156... 0.1163 sec/batch Epoch: 10/20... Training Step: 4403... Training loss: 1.3044... 0.1295 sec/batch Epoch: 10/20... Training Step: 4404... Training loss: 1.1220... 0.1304 sec/batch Epoch: 10/20... Training Step: 4405... Training loss: 1.1665... 0.1266 sec/batch Epoch: 10/20... Training Step: 4406... Training loss: 1.1443... 0.1259 sec/batch Epoch: 10/20... Training Step: 4407... Training loss: 1.1990... 0.1161 sec/batch Epoch: 10/20... Training Step: 4408... Training loss: 1.1773... 0.1143 sec/batch Epoch: 10/20... Training Step: 4409... Training loss: 1.5552... 0.1288 sec/batch Epoch: 10/20... Training Step: 4410... Training loss: 1.1643... 0.1225 sec/batch Epoch: 10/20... Training Step: 4411... Training loss: 1.2691... 0.1212 sec/batch Epoch: 10/20... Training Step: 4412... Training loss: 1.1812... 0.1230 sec/batch Epoch: 10/20... Training Step: 4413... Training loss: 1.2593... 0.1209 sec/batch Epoch: 10/20... Training Step: 4414... Training loss: 1.0751... 0.1175 sec/batch Epoch: 10/20... Training Step: 4415... Training loss: 1.2163... 0.1215 sec/batch Epoch: 10/20... Training Step: 4416... Training loss: 1.3049... 0.1153 sec/batch Epoch: 10/20... Training Step: 4417... Training loss: 1.1065... 0.1221 sec/batch Epoch: 10/20... Training Step: 4418... Training loss: 1.0960... 0.1295 sec/batch Epoch: 10/20... Training Step: 4419... Training loss: 1.1576... 0.1255 sec/batch Epoch: 10/20... Training Step: 4420... Training loss: 1.1537... 0.1231 sec/batch Epoch: 10/20... Training Step: 4421... Training loss: 1.1535... 0.1284 sec/batch Epoch: 10/20... Training Step: 4422... Training loss: 1.0833... 0.1234 sec/batch Epoch: 10/20... Training Step: 4423... Training loss: 1.1872... 0.1312 sec/batch Epoch: 10/20... Training Step: 4424... Training loss: 1.3153... 0.1357 sec/batch Epoch: 10/20... Training Step: 4425... Training loss: 1.1688... 0.1372 sec/batch Epoch: 10/20... Training Step: 4426... Training loss: 1.1577... 0.1385 sec/batch Epoch: 10/20... Training Step: 4427... Training loss: 1.2140... 0.1339 sec/batch Epoch: 10/20... Training Step: 4428... Training loss: 1.2322... 0.1403 sec/batch Epoch: 10/20... Training Step: 4429... Training loss: 1.0904... 0.1304 sec/batch Epoch: 10/20... Training Step: 4430... Training loss: 1.1745... 0.1270 sec/batch Epoch: 10/20... Training Step: 4431... Training loss: 1.1599... 0.1208 sec/batch Epoch: 10/20... Training Step: 4432... Training loss: 1.0859... 0.1125 sec/batch Epoch: 10/20... Training Step: 4433... Training loss: 1.2790... 0.1152 sec/batch Epoch: 10/20... Training Step: 4434... Training loss: 1.0800... 0.1207 sec/batch Epoch: 10/20... Training Step: 4435... Training loss: 1.1067... 0.1300 sec/batch Epoch: 10/20... Training Step: 4436... Training loss: 1.1353... 0.1255 sec/batch Epoch: 10/20... Training Step: 4437... Training loss: 1.1365... 0.1274 sec/batch Epoch: 10/20... Training Step: 4438... Training loss: 1.1661... 0.1156 sec/batch Epoch: 10/20... Training Step: 4439... Training loss: 1.1877... 0.1297 sec/batch Epoch: 10/20... Training Step: 4440... Training loss: 1.1833... 0.1274 sec/batch Epoch: 10/20... Training Step: 4441... Training loss: 1.2668... 0.1241 sec/batch Epoch: 10/20... Training Step: 4442... Training loss: 1.1587... 0.1218 sec/batch Epoch: 10/20... Training Step: 4443... Training loss: 1.2971... 0.1245 sec/batch Epoch: 10/20... Training Step: 4444... Training loss: 1.3498... 0.1242 sec/batch Epoch: 10/20... Training Step: 4445... Training loss: 1.2474... 0.1250 sec/batch Epoch: 10/20... Training Step: 4446... Training loss: 1.3338... 0.1268 sec/batch Epoch: 10/20... Training Step: 4447... Training loss: 1.2505... 0.1300 sec/batch Epoch: 10/20... Training Step: 4448... Training loss: 1.3320... 0.1191 sec/batch Epoch: 10/20... Training Step: 4449... Training loss: 1.3439... 0.1233 sec/batch Epoch: 10/20... Training Step: 4450... Training loss: 1.2009... 0.1234 sec/batch Epoch: 10/20... Training Step: 4451... Training loss: 1.2329... 0.1283 sec/batch Epoch: 10/20... Training Step: 4452... Training loss: 1.1888... 0.1180 sec/batch Epoch: 10/20... Training Step: 4453... Training loss: 1.1294... 0.1290 sec/batch Epoch: 10/20... Training Step: 4454... Training loss: 1.4821... 0.1228 sec/batch Epoch: 10/20... Training Step: 4455... Training loss: 1.1302... 0.1146 sec/batch Epoch: 10/20... Training Step: 4456... Training loss: 1.1801... 0.1197 sec/batch Epoch: 10/20... Training Step: 4457... Training loss: 1.1166... 0.1253 sec/batch Epoch: 10/20... Training Step: 4458... Training loss: 1.1651... 0.1258 sec/batch Epoch: 10/20... Training Step: 4459... Training loss: 1.0581... 0.1248 sec/batch Epoch: 10/20... Training Step: 4460... Training loss: 1.1582... 0.1231 sec/batch Epoch: 10/20... Training Step: 4461... Training loss: 0.9890... 0.1224 sec/batch Epoch: 10/20... Training Step: 4462... Training loss: 1.1723... 0.1177 sec/batch Epoch: 10/20... Training Step: 4463... Training loss: 1.1728... 0.1238 sec/batch Epoch: 10/20... Training Step: 4464... Training loss: 1.3096... 0.1273 sec/batch Epoch: 10/20... Training Step: 4465... Training loss: 1.2258... 0.1166 sec/batch Epoch: 10/20... Training Step: 4466... Training loss: 1.2037... 0.1208 sec/batch Epoch: 10/20... Training Step: 4467... Training loss: 1.1569... 0.1206 sec/batch Epoch: 10/20... Training Step: 4468... Training loss: 1.1468... 0.1305 sec/batch Epoch: 10/20... Training Step: 4469... Training loss: 1.0369... 0.1243 sec/batch Epoch: 10/20... Training Step: 4470... Training loss: 1.1994... 0.1239 sec/batch Epoch: 10/20... Training Step: 4471... Training loss: 1.2354... 0.1197 sec/batch Epoch: 10/20... Training Step: 4472... Training loss: 1.5016... 0.1252 sec/batch Epoch: 10/20... Training Step: 4473... Training loss: 1.0471... 0.1172 sec/batch Epoch: 10/20... Training Step: 4474... Training loss: 1.0868... 0.1239 sec/batch Epoch: 10/20... Training Step: 4475... Training loss: 1.2115... 0.1340 sec/batch Epoch: 10/20... Training Step: 4476... Training loss: 1.1275... 0.1198 sec/batch Epoch: 10/20... Training Step: 4477... Training loss: 1.1468... 0.1197 sec/batch Epoch: 10/20... Training Step: 4478... Training loss: 1.0887... 0.1194 sec/batch Epoch: 10/20... Training Step: 4479... Training loss: 0.9418... 0.1179 sec/batch Epoch: 10/20... Training Step: 4480... Training loss: 1.2917... 0.1269 sec/batch Epoch: 10/20... Training Step: 4481... Training loss: 0.9377... 0.1306 sec/batch Epoch: 10/20... Training Step: 4482... Training loss: 1.2369... 0.1231 sec/batch Epoch: 10/20... Training Step: 4483... Training loss: 1.2461... 0.1225 sec/batch Epoch: 10/20... Training Step: 4484... Training loss: 1.4924... 0.1250 sec/batch Epoch: 10/20... Training Step: 4485... Training loss: 1.2933... 0.1255 sec/batch Epoch: 10/20... Training Step: 4486... Training loss: 1.2726... 0.1235 sec/batch Epoch: 10/20... Training Step: 4487... Training loss: 1.1304... 0.1151 sec/batch Epoch: 10/20... Training Step: 4488... Training loss: 1.1263... 0.1171 sec/batch Epoch: 10/20... Training Step: 4489... Training loss: 1.0737... 0.1173 sec/batch Epoch: 10/20... Training Step: 4490... Training loss: 1.1072... 0.1154 sec/batch Epoch: 10/20... Training Step: 4491... Training loss: 0.9944... 0.1206 sec/batch Epoch: 10/20... Training Step: 4492... Training loss: 1.1145... 0.1198 sec/batch Epoch: 10/20... Training Step: 4493... Training loss: 0.9994... 0.1198 sec/batch Epoch: 10/20... Training Step: 4494... Training loss: 1.0649... 0.1147 sec/batch Epoch: 10/20... Training Step: 4495... Training loss: 1.0713... 0.1200 sec/batch Epoch: 10/20... Training Step: 4496... Training loss: 1.0945... 0.1192 sec/batch Epoch: 10/20... Training Step: 4497... Training loss: 1.0212... 0.1200 sec/batch Epoch: 10/20... Training Step: 4498... Training loss: 1.2230... 0.1197 sec/batch Epoch: 10/20... Training Step: 4499... Training loss: 1.1378... 0.1165 sec/batch Epoch: 10/20... Training Step: 4500... Training loss: 0.9861... 0.1162 sec/batch Epoch: 10/20... Training Step: 4501... Training loss: 1.0085... 0.1190 sec/batch Epoch: 10/20... Training Step: 4502... Training loss: 0.9308... 0.1192 sec/batch Epoch: 10/20... Training Step: 4503... Training loss: 1.1057... 0.1144 sec/batch Epoch: 10/20... Training Step: 4504... Training loss: 1.0586... 0.1177 sec/batch Epoch: 10/20... Training Step: 4505... Training loss: 1.1945... 0.1206 sec/batch Epoch: 10/20... Training Step: 4506... Training loss: 1.0820... 0.1199 sec/batch Epoch: 10/20... Training Step: 4507... Training loss: 1.1202... 0.1169 sec/batch Epoch: 10/20... Training Step: 4508... Training loss: 1.1445... 0.1169 sec/batch Epoch: 10/20... Training Step: 4509... Training loss: 1.0296... 0.1202 sec/batch Epoch: 10/20... Training Step: 4510... Training loss: 1.0141... 0.1168 sec/batch Epoch: 10/20... Training Step: 4511... Training loss: 1.1108... 0.1178 sec/batch Epoch: 10/20... Training Step: 4512... Training loss: 1.0957... 0.1168 sec/batch Epoch: 10/20... Training Step: 4513... Training loss: 1.0818... 0.1172 sec/batch Epoch: 10/20... Training Step: 4514... Training loss: 1.0928... 0.1179 sec/batch Epoch: 10/20... Training Step: 4515... Training loss: 1.2028... 0.1150 sec/batch Epoch: 10/20... Training Step: 4516... Training loss: 1.0266... 0.1197 sec/batch Epoch: 10/20... Training Step: 4517... Training loss: 1.0507... 0.1184 sec/batch Epoch: 10/20... Training Step: 4518... Training loss: 1.1874... 0.1169 sec/batch Epoch: 10/20... Training Step: 4519... Training loss: 0.9585... 0.1194 sec/batch Epoch: 10/20... Training Step: 4520... Training loss: 1.1214... 0.1168 sec/batch Epoch: 10/20... Training Step: 4521... Training loss: 0.9954... 0.1201 sec/batch Epoch: 10/20... Training Step: 4522... Training loss: 0.9271... 0.1194 sec/batch Epoch: 10/20... Training Step: 4523... Training loss: 1.0071... 0.1180 sec/batch Epoch: 10/20... Training Step: 4524... Training loss: 1.2725... 0.1180 sec/batch Epoch: 10/20... Training Step: 4525... Training loss: 1.0510... 0.1200 sec/batch Epoch: 10/20... Training Step: 4526... Training loss: 1.0897... 0.1184 sec/batch Epoch: 10/20... Training Step: 4527... Training loss: 1.1484... 0.1235 sec/batch Epoch: 10/20... Training Step: 4528... Training loss: 0.9878... 0.1188 sec/batch Epoch: 10/20... Training Step: 4529... Training loss: 0.9875... 0.1239 sec/batch Epoch: 10/20... Training Step: 4530... Training loss: 0.8642... 0.1252 sec/batch Epoch: 10/20... Training Step: 4531... Training loss: 1.1479... 0.1246 sec/batch Epoch: 10/20... Training Step: 4532... Training loss: 1.0864... 0.1302 sec/batch Epoch: 10/20... Training Step: 4533... Training loss: 1.0855... 0.1257 sec/batch Epoch: 10/20... Training Step: 4534... Training loss: 1.0987... 0.1288 sec/batch Epoch: 10/20... Training Step: 4535... Training loss: 1.2618... 0.1253 sec/batch Epoch: 10/20... Training Step: 4536... Training loss: 0.8980... 0.1277 sec/batch Epoch: 10/20... Training Step: 4537... Training loss: 1.3095... 0.1407 sec/batch Epoch: 10/20... Training Step: 4538... Training loss: 1.2216... 0.1280 sec/batch Epoch: 10/20... Training Step: 4539... Training loss: 0.9911... 0.1265 sec/batch Epoch: 10/20... Training Step: 4540... Training loss: 1.1801... 0.1215 sec/batch Epoch: 10/20... Training Step: 4541... Training loss: 1.0355... 0.1281 sec/batch Epoch: 10/20... Training Step: 4542... Training loss: 1.1698... 0.1282 sec/batch Epoch: 10/20... Training Step: 4543... Training loss: 1.1100... 0.1263 sec/batch Epoch: 10/20... Training Step: 4544... Training loss: 1.2247... 0.1322 sec/batch Epoch: 10/20... Training Step: 4545... Training loss: 1.0733... 0.1172 sec/batch Epoch: 10/20... Training Step: 4546... Training loss: 1.1531... 0.1336 sec/batch Epoch: 10/20... Training Step: 4547... Training loss: 1.0669... 0.1252 sec/batch Epoch: 10/20... Training Step: 4548... Training loss: 1.1865... 0.1177 sec/batch Epoch: 10/20... Training Step: 4549... Training loss: 1.0054... 0.1170 sec/batch Epoch: 10/20... Training Step: 4550... Training loss: 1.2985... 0.1311 sec/batch Epoch: 10/20... Training Step: 4551... Training loss: 1.1856... 0.1204 sec/batch Epoch: 10/20... Training Step: 4552... Training loss: 1.2340... 0.1270 sec/batch Epoch: 10/20... Training Step: 4553... Training loss: 1.2563... 0.1210 sec/batch Epoch: 10/20... Training Step: 4554... Training loss: 1.2695... 0.1263 sec/batch Epoch: 10/20... Training Step: 4555... Training loss: 1.0730... 0.1205 sec/batch Epoch: 10/20... Training Step: 4556... Training loss: 1.1440... 0.1252 sec/batch Epoch: 10/20... Training Step: 4557... Training loss: 0.9799... 0.1297 sec/batch Epoch: 10/20... Training Step: 4558... Training loss: 1.0193... 0.1175 sec/batch Epoch: 10/20... Training Step: 4559... Training loss: 1.1328... 0.1203 sec/batch Epoch: 10/20... Training Step: 4560... Training loss: 1.1403... 0.1213 sec/batch Epoch: 10/20... Training Step: 4561... Training loss: 1.0056... 0.1238 sec/batch Epoch: 10/20... Training Step: 4562... Training loss: 1.2066... 0.1288 sec/batch Epoch: 10/20... Training Step: 4563... Training loss: 1.2016... 0.1253 sec/batch Epoch: 10/20... Training Step: 4564... Training loss: 0.9988... 0.1284 sec/batch Epoch: 10/20... Training Step: 4565... Training loss: 0.9943... 0.1246 sec/batch Epoch: 10/20... Training Step: 4566... Training loss: 1.0810... 0.1304 sec/batch Epoch: 10/20... Training Step: 4567... Training loss: 0.9547... 0.1391 sec/batch Epoch: 10/20... Training Step: 4568... Training loss: 1.0027... 0.1273 sec/batch Epoch: 10/20... Training Step: 4569... Training loss: 1.0545... 0.1247 sec/batch Epoch: 10/20... Training Step: 4570... Training loss: 1.1210... 0.1379 sec/batch Epoch: 10/20... Training Step: 4571... Training loss: 1.0725... 0.1288 sec/batch Epoch: 10/20... Training Step: 4572... Training loss: 1.2257... 0.1273 sec/batch Epoch: 10/20... Training Step: 4573... Training loss: 1.0468... 0.1264 sec/batch Epoch: 10/20... Training Step: 4574... Training loss: 1.1249... 0.1246 sec/batch Epoch: 10/20... Training Step: 4575... Training loss: 1.0677... 0.1221 sec/batch Epoch: 10/20... Training Step: 4576... Training loss: 1.0749... 0.1219 sec/batch Epoch: 10/20... Training Step: 4577... Training loss: 1.0591... 0.1293 sec/batch Epoch: 10/20... Training Step: 4578... Training loss: 0.9274... 0.1282 sec/batch Epoch: 10/20... Training Step: 4579... Training loss: 1.0867... 0.1275 sec/batch Epoch: 10/20... Training Step: 4580... Training loss: 1.2035... 0.1283 sec/batch Epoch: 10/20... Training Step: 4581... Training loss: 1.1601... 0.1315 sec/batch Epoch: 10/20... Training Step: 4582... Training loss: 1.2037... 0.1343 sec/batch Epoch: 10/20... Training Step: 4583... Training loss: 1.0257... 0.1341 sec/batch Epoch: 10/20... Training Step: 4584... Training loss: 1.4482... 0.1222 sec/batch Epoch: 10/20... Training Step: 4585... Training loss: 1.0753... 0.1174 sec/batch Epoch: 10/20... Training Step: 4586... Training loss: 1.0926... 0.1246 sec/batch Epoch: 10/20... Training Step: 4587... Training loss: 1.0699... 0.1184 sec/batch Epoch: 10/20... Training Step: 4588... Training loss: 1.1414... 0.1176 sec/batch Epoch: 10/20... Training Step: 4589... Training loss: 1.1855... 0.1237 sec/batch Epoch: 10/20... Training Step: 4590... Training loss: 1.0433... 0.1248 sec/batch Epoch: 10/20... Training Step: 4591... Training loss: 1.2090... 0.1231 sec/batch Epoch: 10/20... Training Step: 4592... Training loss: 1.4427... 0.1291 sec/batch Epoch: 10/20... Training Step: 4593... Training loss: 1.2945... 0.1301 sec/batch Epoch: 10/20... Training Step: 4594... Training loss: 0.9396... 0.1180 sec/batch Epoch: 10/20... Training Step: 4595... Training loss: 1.0788... 0.1161 sec/batch Epoch: 10/20... Training Step: 4596... Training loss: 0.9580... 0.1257 sec/batch Epoch: 10/20... Training Step: 4597... Training loss: 1.1633... 0.1194 sec/batch Epoch: 10/20... Training Step: 4598... Training loss: 1.1444... 0.1165 sec/batch Epoch: 10/20... Training Step: 4599... Training loss: 1.1302... 0.1147 sec/batch Epoch: 10/20... Training Step: 4600... Training loss: 1.2508... 0.1204 sec/batch Epoch: 10/20... Training Step: 4601... Training loss: 1.0822... 0.1165 sec/batch Epoch: 10/20... Training Step: 4602... Training loss: 1.1038... 0.1170 sec/batch Epoch: 10/20... Training Step: 4603... Training loss: 1.1726... 0.1276 sec/batch Epoch: 10/20... Training Step: 4604... Training loss: 1.1037... 0.1211 sec/batch Epoch: 10/20... Training Step: 4605... Training loss: 1.0177... 0.1192 sec/batch Epoch: 10/20... Training Step: 4606... Training loss: 1.1115... 0.1289 sec/batch Epoch: 10/20... Training Step: 4607... Training loss: 1.3377... 0.1181 sec/batch Epoch: 10/20... Training Step: 4608... Training loss: 1.1337... 0.1225 sec/batch Epoch: 10/20... Training Step: 4609... Training loss: 1.3820... 0.1271 sec/batch Epoch: 10/20... Training Step: 4610... Training loss: 1.2910... 0.1222 sec/batch Epoch: 10/20... Training Step: 4611... Training loss: 0.9744... 0.1175 sec/batch Epoch: 10/20... Training Step: 4612... Training loss: 1.1844... 0.1256 sec/batch Epoch: 10/20... Training Step: 4613... Training loss: 1.0170... 0.1194 sec/batch Epoch: 10/20... Training Step: 4614... Training loss: 1.2857... 0.1103 sec/batch Epoch: 10/20... Training Step: 4615... Training loss: 1.3345... 0.1289 sec/batch Epoch: 10/20... Training Step: 4616... Training loss: 1.2986... 0.1235 sec/batch Epoch: 10/20... Training Step: 4617... Training loss: 0.9613... 0.1251 sec/batch Epoch: 10/20... Training Step: 4618... Training loss: 1.1451... 0.1227 sec/batch Epoch: 10/20... Training Step: 4619... Training loss: 1.2347... 0.1267 sec/batch Epoch: 10/20... Training Step: 4620... Training loss: 1.1812... 0.1261 sec/batch Epoch: 10/20... Training Step: 4621... Training loss: 1.2636... 0.1266 sec/batch Epoch: 10/20... Training Step: 4622... Training loss: 1.1318... 0.1233 sec/batch Epoch: 10/20... Training Step: 4623... Training loss: 1.0897... 0.1202 sec/batch Epoch: 10/20... Training Step: 4624... Training loss: 0.9945... 0.1241 sec/batch Epoch: 10/20... Training Step: 4625... Training loss: 1.2001... 0.1261 sec/batch Epoch: 10/20... Training Step: 4626... Training loss: 1.1133... 0.1224 sec/batch Epoch: 10/20... Training Step: 4627... Training loss: 1.0142... 0.1242 sec/batch Epoch: 10/20... Training Step: 4628... Training loss: 1.2936... 0.1266 sec/batch Epoch: 10/20... Training Step: 4629... Training loss: 1.0897... 0.1272 sec/batch Epoch: 10/20... Training Step: 4630... Training loss: 1.4363... 0.1220 sec/batch Epoch: 10/20... Training Step: 4631... Training loss: 1.2970... 0.1225 sec/batch Epoch: 10/20... Training Step: 4632... Training loss: 0.9405... 0.1284 sec/batch Epoch: 10/20... Training Step: 4633... Training loss: 1.0258... 0.1305 sec/batch Epoch: 10/20... Training Step: 4634... Training loss: 0.9095... 0.1160 sec/batch Epoch: 10/20... Training Step: 4635... Training loss: 0.9603... 0.1237 sec/batch Epoch: 10/20... Training Step: 4636... Training loss: 1.1425... 0.1233 sec/batch Epoch: 10/20... Training Step: 4637... Training loss: 1.1764... 0.1144 sec/batch Epoch: 10/20... Training Step: 4638... Training loss: 1.0053... 0.1136 sec/batch Epoch: 10/20... Training Step: 4639... Training loss: 1.1530... 0.1142 sec/batch Epoch: 10/20... Training Step: 4640... Training loss: 1.0521... 0.1248 sec/batch Epoch: 11/20... Training Step: 4641... Training loss: 1.4921... 0.1404 sec/batch Epoch: 11/20... Training Step: 4642... Training loss: 1.1083... 0.1331 sec/batch Epoch: 11/20... Training Step: 4643... Training loss: 1.0618... 0.1265 sec/batch Epoch: 11/20... Training Step: 4644... Training loss: 1.1195... 0.1216 sec/batch Epoch: 11/20... Training Step: 4645... Training loss: 1.1696... 0.1255 sec/batch Epoch: 11/20... Training Step: 4646... Training loss: 1.0381... 0.1247 sec/batch Epoch: 11/20... Training Step: 4647... Training loss: 1.2149... 0.1256 sec/batch Epoch: 11/20... Training Step: 4648... Training loss: 1.0056... 0.1261 sec/batch Epoch: 11/20... Training Step: 4649... Training loss: 1.0171... 0.1266 sec/batch Epoch: 11/20... Training Step: 4650... Training loss: 1.1183... 0.1316 sec/batch Epoch: 11/20... Training Step: 4651... Training loss: 1.1784... 0.1261 sec/batch Epoch: 11/20... Training Step: 4652... Training loss: 0.9166... 0.1324 sec/batch Epoch: 11/20... Training Step: 4653... Training loss: 1.3499... 0.1275 sec/batch Epoch: 11/20... Training Step: 4654... Training loss: 0.8840... 0.1201 sec/batch Epoch: 11/20... Training Step: 4655... Training loss: 1.2156... 0.1242 sec/batch Epoch: 11/20... Training Step: 4656... Training loss: 1.2437... 0.1250 sec/batch Epoch: 11/20... Training Step: 4657... Training loss: 1.1115... 0.1195 sec/batch Epoch: 11/20... Training Step: 4658... Training loss: 1.2028... 0.1223 sec/batch Epoch: 11/20... Training Step: 4659... Training loss: 1.1225... 0.1240 sec/batch Epoch: 11/20... Training Step: 4660... Training loss: 1.0719... 0.1304 sec/batch Epoch: 11/20... Training Step: 4661... Training loss: 1.2356... 0.1283 sec/batch Epoch: 11/20... Training Step: 4662... Training loss: 1.0856... 0.1282 sec/batch Epoch: 11/20... Training Step: 4663... Training loss: 1.2345... 0.1260 sec/batch Epoch: 11/20... Training Step: 4664... Training loss: 1.0212... 0.1320 sec/batch Epoch: 11/20... Training Step: 4665... Training loss: 1.1290... 0.1410 sec/batch Epoch: 11/20... Training Step: 4666... Training loss: 1.2081... 0.1309 sec/batch Epoch: 11/20... Training Step: 4667... Training loss: 1.1725... 0.1348 sec/batch Epoch: 11/20... Training Step: 4668... Training loss: 0.9423... 0.1353 sec/batch Epoch: 11/20... Training Step: 4669... Training loss: 1.1628... 0.1345 sec/batch Epoch: 11/20... Training Step: 4670... Training loss: 1.1500... 0.1296 sec/batch Epoch: 11/20... Training Step: 4671... Training loss: 0.9715... 0.1242 sec/batch Epoch: 11/20... Training Step: 4672... Training loss: 1.0834... 0.1317 sec/batch Epoch: 11/20... Training Step: 4673... Training loss: 0.9153... 0.1307 sec/batch Epoch: 11/20... Training Step: 4674... Training loss: 0.9855... 0.1238 sec/batch Epoch: 11/20... Training Step: 4675... Training loss: 0.9815... 0.1328 sec/batch Epoch: 11/20... Training Step: 4676... Training loss: 1.0538... 0.1295 sec/batch Epoch: 11/20... Training Step: 4677... Training loss: 1.1860... 0.1275 sec/batch Epoch: 11/20... Training Step: 4678... Training loss: 1.0233... 0.1294 sec/batch Epoch: 11/20... Training Step: 4679... Training loss: 1.0704... 0.1239 sec/batch Epoch: 11/20... Training Step: 4680... Training loss: 1.3717... 0.1304 sec/batch Epoch: 11/20... Training Step: 4681... Training loss: 1.1149... 0.1337 sec/batch Epoch: 11/20... Training Step: 4682... Training loss: 1.0458... 0.1337 sec/batch Epoch: 11/20... Training Step: 4683... Training loss: 1.2264... 0.1287 sec/batch Epoch: 11/20... Training Step: 4684... Training loss: 0.8999... 0.1347 sec/batch Epoch: 11/20... Training Step: 4685... Training loss: 1.0690... 0.1288 sec/batch Epoch: 11/20... Training Step: 4686... Training loss: 1.0925... 0.1311 sec/batch Epoch: 11/20... Training Step: 4687... Training loss: 1.1917... 0.1299 sec/batch Epoch: 11/20... Training Step: 4688... Training loss: 1.1074... 0.1171 sec/batch Epoch: 11/20... Training Step: 4689... Training loss: 1.1166... 0.1326 sec/batch Epoch: 11/20... Training Step: 4690... Training loss: 1.0760... 0.1327 sec/batch Epoch: 11/20... Training Step: 4691... Training loss: 1.1052... 0.1282 sec/batch Epoch: 11/20... Training Step: 4692... Training loss: 1.2709... 0.1327 sec/batch Epoch: 11/20... Training Step: 4693... Training loss: 1.1516... 0.1324 sec/batch Epoch: 11/20... Training Step: 4694... Training loss: 1.0441... 0.1301 sec/batch Epoch: 11/20... Training Step: 4695... Training loss: 1.0230... 0.1282 sec/batch Epoch: 11/20... Training Step: 4696... Training loss: 1.0265... 0.1307 sec/batch Epoch: 11/20... Training Step: 4697... Training loss: 1.0687... 0.1218 sec/batch Epoch: 11/20... Training Step: 4698... Training loss: 1.2254... 0.1244 sec/batch Epoch: 11/20... Training Step: 4699... Training loss: 0.8824... 0.1268 sec/batch Epoch: 11/20... Training Step: 4700... Training loss: 1.0476... 0.1315 sec/batch Epoch: 11/20... Training Step: 4701... Training loss: 1.0092... 0.1349 sec/batch Epoch: 11/20... Training Step: 4702... Training loss: 1.2437... 0.1364 sec/batch Epoch: 11/20... Training Step: 4703... Training loss: 0.9482... 0.1277 sec/batch Epoch: 11/20... Training Step: 4704... Training loss: 1.1660... 0.1343 sec/batch Epoch: 11/20... Training Step: 4705... Training loss: 0.9995... 0.1366 sec/batch Epoch: 11/20... Training Step: 4706... Training loss: 1.1502... 0.1349 sec/batch Epoch: 11/20... Training Step: 4707... Training loss: 1.1054... 0.1373 sec/batch Epoch: 11/20... Training Step: 4708... Training loss: 1.1832... 0.1276 sec/batch Epoch: 11/20... Training Step: 4709... Training loss: 0.9893... 0.1375 sec/batch Epoch: 11/20... Training Step: 4710... Training loss: 1.0998... 0.1383 sec/batch Epoch: 11/20... Training Step: 4711... Training loss: 1.2514... 0.1275 sec/batch Epoch: 11/20... Training Step: 4712... Training loss: 0.9839... 0.1307 sec/batch Epoch: 11/20... Training Step: 4713... Training loss: 1.1467... 0.1353 sec/batch Epoch: 11/20... Training Step: 4714... Training loss: 1.0221... 0.1285 sec/batch Epoch: 11/20... Training Step: 4715... Training loss: 1.2358... 0.1355 sec/batch Epoch: 11/20... Training Step: 4716... Training loss: 1.0757... 0.1316 sec/batch Epoch: 11/20... Training Step: 4717... Training loss: 1.0302... 0.1318 sec/batch Epoch: 11/20... Training Step: 4718... Training loss: 1.1453... 0.1320 sec/batch Epoch: 11/20... Training Step: 4719... Training loss: 1.2225... 0.1351 sec/batch Epoch: 11/20... Training Step: 4720... Training loss: 0.9376... 0.1336 sec/batch Epoch: 11/20... Training Step: 4721... Training loss: 1.2626... 0.1297 sec/batch Epoch: 11/20... Training Step: 4722... Training loss: 1.0687... 0.1204 sec/batch Epoch: 11/20... Training Step: 4723... Training loss: 1.0852... 0.1272 sec/batch Epoch: 11/20... Training Step: 4724... Training loss: 1.2302... 0.1272 sec/batch Epoch: 11/20... Training Step: 4725... Training loss: 1.0821... 0.1205 sec/batch Epoch: 11/20... Training Step: 4726... Training loss: 1.2828... 0.1286 sec/batch Epoch: 11/20... Training Step: 4727... Training loss: 0.9707... 0.1257 sec/batch Epoch: 11/20... Training Step: 4728... Training loss: 1.1511... 0.1257 sec/batch Epoch: 11/20... Training Step: 4729... Training loss: 1.1880... 0.1204 sec/batch Epoch: 11/20... Training Step: 4730... Training loss: 1.0725... 0.1249 sec/batch Epoch: 11/20... Training Step: 4731... Training loss: 1.1922... 0.1298 sec/batch Epoch: 11/20... Training Step: 4732... Training loss: 1.1713... 0.1228 sec/batch Epoch: 11/20... Training Step: 4733... Training loss: 0.9878... 0.1242 sec/batch Epoch: 11/20... Training Step: 4734... Training loss: 1.1699... 0.1230 sec/batch Epoch: 11/20... Training Step: 4735... Training loss: 1.1313... 0.1278 sec/batch Epoch: 11/20... Training Step: 4736... Training loss: 1.0617... 0.1284 sec/batch Epoch: 11/20... Training Step: 4737... Training loss: 1.2983... 0.1285 sec/batch Epoch: 11/20... Training Step: 4738... Training loss: 1.1603... 0.1184 sec/batch Epoch: 11/20... Training Step: 4739... Training loss: 1.1196... 0.1170 sec/batch Epoch: 11/20... Training Step: 4740... Training loss: 1.1457... 0.1281 sec/batch Epoch: 11/20... Training Step: 4741... Training loss: 1.1547... 0.1291 sec/batch Epoch: 11/20... Training Step: 4742... Training loss: 1.3098... 0.1180 sec/batch Epoch: 11/20... Training Step: 4743... Training loss: 1.2495... 0.1270 sec/batch Epoch: 11/20... Training Step: 4744... Training loss: 1.1795... 0.1278 sec/batch Epoch: 11/20... Training Step: 4745... Training loss: 1.2423... 0.1138 sec/batch Epoch: 11/20... Training Step: 4746... Training loss: 1.2896... 0.1226 sec/batch Epoch: 11/20... Training Step: 4747... Training loss: 1.0368... 0.1258 sec/batch Epoch: 11/20... Training Step: 4748... Training loss: 1.2004... 0.1190 sec/batch Epoch: 11/20... Training Step: 4749... Training loss: 1.1259... 0.1195 sec/batch Epoch: 11/20... Training Step: 4750... Training loss: 1.0013... 0.1258 sec/batch Epoch: 11/20... Training Step: 4751... Training loss: 1.1755... 0.1277 sec/batch Epoch: 11/20... Training Step: 4752... Training loss: 0.9733... 0.1249 sec/batch Epoch: 11/20... Training Step: 4753... Training loss: 1.2272... 0.1228 sec/batch Epoch: 11/20... Training Step: 4754... Training loss: 1.3725... 0.1227 sec/batch Epoch: 11/20... Training Step: 4755... Training loss: 1.1601... 0.1187 sec/batch Epoch: 11/20... Training Step: 4756... Training loss: 1.1013... 0.1307 sec/batch Epoch: 11/20... Training Step: 4757... Training loss: 1.2000... 0.1242 sec/batch Epoch: 11/20... Training Step: 4758... Training loss: 1.2297... 0.1234 sec/batch Epoch: 11/20... Training Step: 4759... Training loss: 1.1158... 0.1196 sec/batch Epoch: 11/20... Training Step: 4760... Training loss: 1.0306... 0.1190 sec/batch Epoch: 11/20... Training Step: 4761... Training loss: 1.1655... 0.1194 sec/batch Epoch: 11/20... Training Step: 4762... Training loss: 1.2437... 0.1229 sec/batch Epoch: 11/20... Training Step: 4763... Training loss: 1.0416... 0.1205 sec/batch Epoch: 11/20... Training Step: 4764... Training loss: 1.1350... 0.1180 sec/batch Epoch: 11/20... Training Step: 4765... Training loss: 1.2315... 0.1232 sec/batch Epoch: 11/20... Training Step: 4766... Training loss: 0.9244... 0.1179 sec/batch Epoch: 11/20... Training Step: 4767... Training loss: 1.0917... 0.1162 sec/batch Epoch: 11/20... Training Step: 4768... Training loss: 1.2004... 0.1220 sec/batch Epoch: 11/20... Training Step: 4769... Training loss: 1.1902... 0.1283 sec/batch Epoch: 11/20... Training Step: 4770... Training loss: 1.0264... 0.1217 sec/batch Epoch: 11/20... Training Step: 4771... Training loss: 1.3077... 0.1200 sec/batch Epoch: 11/20... Training Step: 4772... Training loss: 1.1000... 0.1209 sec/batch Epoch: 11/20... Training Step: 4773... Training loss: 1.0438... 0.1192 sec/batch Epoch: 11/20... Training Step: 4774... Training loss: 1.2187... 0.1253 sec/batch Epoch: 11/20... Training Step: 4775... Training loss: 0.9529... 0.1157 sec/batch Epoch: 11/20... Training Step: 4776... Training loss: 0.8896... 0.1170 sec/batch Epoch: 11/20... Training Step: 4777... Training loss: 0.9323... 0.1278 sec/batch Epoch: 11/20... Training Step: 4778... Training loss: 1.0310... 0.1322 sec/batch Epoch: 11/20... Training Step: 4779... Training loss: 1.0382... 0.1279 sec/batch Epoch: 11/20... Training Step: 4780... Training loss: 1.2112... 0.1289 sec/batch Epoch: 11/20... Training Step: 4781... Training loss: 0.9467... 0.1230 sec/batch Epoch: 11/20... Training Step: 4782... Training loss: 1.0605... 0.1251 sec/batch Epoch: 11/20... Training Step: 4783... Training loss: 0.9958... 0.1199 sec/batch Epoch: 11/20... Training Step: 4784... Training loss: 1.1139... 0.1181 sec/batch Epoch: 11/20... Training Step: 4785... Training loss: 1.1536... 0.1255 sec/batch Epoch: 11/20... Training Step: 4786... Training loss: 1.0271... 0.1270 sec/batch Epoch: 11/20... Training Step: 4787... Training loss: 1.1275... 0.1238 sec/batch Epoch: 11/20... Training Step: 4788... Training loss: 1.0454... 0.1307 sec/batch Epoch: 11/20... Training Step: 4789... Training loss: 1.0988... 0.1279 sec/batch Epoch: 11/20... Training Step: 4790... Training loss: 1.2477... 0.1334 sec/batch Epoch: 11/20... Training Step: 4791... Training loss: 1.3062... 0.1250 sec/batch Epoch: 11/20... Training Step: 4792... Training loss: 1.1342... 0.1270 sec/batch Epoch: 11/20... Training Step: 4793... Training loss: 1.2682... 0.1228 sec/batch Epoch: 11/20... Training Step: 4794... Training loss: 1.1047... 0.1213 sec/batch Epoch: 11/20... Training Step: 4795... Training loss: 1.1321... 0.1207 sec/batch Epoch: 11/20... Training Step: 4796... Training loss: 1.0380... 0.1226 sec/batch Epoch: 11/20... Training Step: 4797... Training loss: 1.0676... 0.1265 sec/batch Epoch: 11/20... Training Step: 4798... Training loss: 1.1071... 0.1245 sec/batch Epoch: 11/20... Training Step: 4799... Training loss: 1.0611... 0.1301 sec/batch Epoch: 11/20... Training Step: 4800... Training loss: 1.0948... 0.1302 sec/batch Epoch: 11/20... Training Step: 4801... Training loss: 1.0266... 0.1236 sec/batch Epoch: 11/20... Training Step: 4802... Training loss: 1.0968... 0.1261 sec/batch Epoch: 11/20... Training Step: 4803... Training loss: 1.3681... 0.1304 sec/batch Epoch: 11/20... Training Step: 4804... Training loss: 1.0007... 0.1185 sec/batch Epoch: 11/20... Training Step: 4805... Training loss: 1.1843... 0.1248 sec/batch Epoch: 11/20... Training Step: 4806... Training loss: 1.0318... 0.1206 sec/batch Epoch: 11/20... Training Step: 4807... Training loss: 0.9896... 0.1269 sec/batch Epoch: 11/20... Training Step: 4808... Training loss: 1.2661... 0.1266 sec/batch Epoch: 11/20... Training Step: 4809... Training loss: 0.9427... 0.1264 sec/batch Epoch: 11/20... Training Step: 4810... Training loss: 1.1186... 0.1323 sec/batch Epoch: 11/20... Training Step: 4811... Training loss: 1.1462... 0.1232 sec/batch Epoch: 11/20... Training Step: 4812... Training loss: 1.3441... 0.1241 sec/batch Epoch: 11/20... Training Step: 4813... Training loss: 1.0161... 0.1240 sec/batch Epoch: 11/20... Training Step: 4814... Training loss: 1.1153... 0.1221 sec/batch Epoch: 11/20... Training Step: 4815... Training loss: 1.2902... 0.1254 sec/batch Epoch: 11/20... Training Step: 4816... Training loss: 0.9128... 0.1263 sec/batch Epoch: 11/20... Training Step: 4817... Training loss: 1.0423... 0.1315 sec/batch Epoch: 11/20... Training Step: 4818... Training loss: 1.2092... 0.1275 sec/batch Epoch: 11/20... Training Step: 4819... Training loss: 0.8835... 0.1276 sec/batch Epoch: 11/20... Training Step: 4820... Training loss: 1.1247... 0.1240 sec/batch Epoch: 11/20... Training Step: 4821... Training loss: 0.9866... 0.1320 sec/batch Epoch: 11/20... Training Step: 4822... Training loss: 1.2689... 0.1262 sec/batch Epoch: 11/20... Training Step: 4823... Training loss: 1.1629... 0.1333 sec/batch Epoch: 11/20... Training Step: 4824... Training loss: 1.1107... 0.1232 sec/batch Epoch: 11/20... Training Step: 4825... Training loss: 1.1944... 0.1270 sec/batch Epoch: 11/20... Training Step: 4826... Training loss: 1.0856... 0.1250 sec/batch Epoch: 11/20... Training Step: 4827... Training loss: 1.2338... 0.1293 sec/batch Epoch: 11/20... Training Step: 4828... Training loss: 0.9642... 0.1335 sec/batch Epoch: 11/20... Training Step: 4829... Training loss: 1.1981... 0.1326 sec/batch Epoch: 11/20... Training Step: 4830... Training loss: 1.0794... 0.1316 sec/batch Epoch: 11/20... Training Step: 4831... Training loss: 1.0407... 0.1341 sec/batch Epoch: 11/20... Training Step: 4832... Training loss: 1.1374... 0.1289 sec/batch Epoch: 11/20... Training Step: 4833... Training loss: 1.1122... 0.1229 sec/batch Epoch: 11/20... Training Step: 4834... Training loss: 1.0800... 0.1253 sec/batch Epoch: 11/20... Training Step: 4835... Training loss: 1.1373... 0.1355 sec/batch Epoch: 11/20... Training Step: 4836... Training loss: 1.1398... 0.1286 sec/batch Epoch: 11/20... Training Step: 4837... Training loss: 1.1156... 0.1211 sec/batch Epoch: 11/20... Training Step: 4838... Training loss: 1.2383... 0.1221 sec/batch Epoch: 11/20... Training Step: 4839... Training loss: 0.9259... 0.1417 sec/batch Epoch: 11/20... Training Step: 4840... Training loss: 1.1507... 0.1457 sec/batch Epoch: 11/20... Training Step: 4841... Training loss: 1.0600... 0.1292 sec/batch Epoch: 11/20... Training Step: 4842... Training loss: 1.1656... 0.1279 sec/batch Epoch: 11/20... Training Step: 4843... Training loss: 1.0673... 0.1206 sec/batch Epoch: 11/20... Training Step: 4844... Training loss: 1.1729... 0.1153 sec/batch Epoch: 11/20... Training Step: 4845... Training loss: 1.0391... 0.1160 sec/batch Epoch: 11/20... Training Step: 4846... Training loss: 1.0933... 0.1131 sec/batch Epoch: 11/20... Training Step: 4847... Training loss: 1.1472... 0.1207 sec/batch Epoch: 11/20... Training Step: 4848... Training loss: 1.1318... 0.1339 sec/batch Epoch: 11/20... Training Step: 4849... Training loss: 1.1397... 0.1247 sec/batch Epoch: 11/20... Training Step: 4850... Training loss: 0.8784... 0.1283 sec/batch Epoch: 11/20... Training Step: 4851... Training loss: 1.0675... 0.1284 sec/batch Epoch: 11/20... Training Step: 4852... Training loss: 1.1329... 0.1405 sec/batch Epoch: 11/20... Training Step: 4853... Training loss: 1.2140... 0.1228 sec/batch Epoch: 11/20... Tra