from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/text_classification/clue/main')
Mounted at /content/gdrive
!pip install transformers
Collecting transformers Downloading https://files.pythonhosted.org/packages/2c/4e/4f1ede0fd7a36278844a277f8d53c21f88f37f3754abf76a5d6224f76d4a/transformers-3.4.0-py3-none-any.whl (1.3MB) |████████████████████████████████| 1.3MB 13.8MB/s Collecting sacremoses Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB) |████████████████████████████████| 890kB 50.9MB/s Requirement already satisfied: protobuf in /usr/local/lib/python3.6/dist-packages (from transformers) (3.12.4) Requirement already satisfied: dataclasses; python_version < "3.7" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7) Collecting sentencepiece!=0.1.92 Downloading https://files.pythonhosted.org/packages/e5/2d/6d4ca4bef9a67070fa1cac508606328329152b1df10bdf31fb6e4e727894/sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1MB) |████████████████████████████████| 1.1MB 36.7MB/s Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12) Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1) Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5) Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0) Collecting tokenizers==0.9.2 Downloading https://files.pythonhosted.org/packages/7c/a5/78be1a55b2ac8d6a956f0a211d372726e2b1dd2666bb537fea9b03abd62c/tokenizers-0.9.2-cp36-cp36m-manylinux1_x86_64.whl (2.9MB) |████████████████████████████████| 2.9MB 54.7MB/s Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20) Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4) Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.15.0) Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2) Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.17.0) Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf->transformers) (50.3.2) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10) Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7) Building wheels for collected packages: sacremoses Building wheel for sacremoses (setup.py) ... done Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893257 sha256=24cb962f37640e9d039ac2f68b1629c3dd62fbabc7bfab73dfae80af309bdeac Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45 Successfully built sacremoses Installing collected packages: sacremoses, sentencepiece, tokenizers, transformers Successfully installed sacremoses-0.0.43 sentencepiece-0.1.94 tokenizers-0.9.2 transformers-3.4.0
from transformers import BertTokenizer, TFBertLMHeadModel
import os
import json
import time
import logging
import pprint
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import random
print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())
TensorFlow Version 2.3.0 WARNING:tensorflow:From <ipython-input-3-05a3004c9c96>:13: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.config.list_physical_devices('GPU')` instead. GPU Enabled: True
params = {
'pretrain_path': 'bert-base-chinese',
'train_path': '../data/train.txt',
'test_path': '../data/test.txt',
'batch_size': 16,
'max_len': 130,
'buffer_size': 31728,
'init_lr': 1e-5,
'max_lr': 3e-5,
'n_epochs': 4 * 10,
}
tokenizer = BertTokenizer.from_pretrained(params['pretrain_path'],
lowercase = True,
add_special_tokens = True)
# stream data from text files
def data_generator(f_path, params):
with open(f_path) as f:
print('Reading', f_path)
for line in f:
line = json.loads(line.rstrip())
text, label = line['content'], line['label']
text = list(text)
text = ['[CLS]'] + text + ['[SEP]']
text = tokenizer.convert_tokens_to_ids(text)
text = [idx for idx in text if idx != 100]
if len(text) > params['max_len']:
_max_len = params['max_len'] // 2
text = text[:_max_len] + text[-_max_len:]
seg = [0] * len(text)
noises = []
labels_mask = []
for idx in text:
if (random.random() <= 0.15) and (idx != 101) and (idx != 102):
dice = random.random()
if dice <= 0.8:
noises.append(103)
elif dice <= 0.9:
noises.append(idx)
else:
noises.append(random.randint(0, 21127))
labels_mask.append(1)
else:
noises.append(idx)
labels_mask.append(0)
yield (noises, seg), (text, labels_mask)
def dataset(is_training, params):
_shapes = (([None], [None]), ([None], [None]))
_types = ((tf.int32, tf.int32), (tf.int32, tf.int32))
_pads = ((0, 0), (0, 0))
if is_training:
ds = tf.data.Dataset.from_generator(
lambda: data_generator(params['train_path'], params),
output_shapes = _shapes,
output_types = _types,)
ds = ds.shuffle(params['buffer_size'])
ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
else:
ds = tf.data.Dataset.from_generator(
lambda: data_generator(params['test_path'], params),
output_shapes = _shapes,
output_types = _types,)
ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
return ds
# input stream ids check
(text, seg), (labels, labels_mask) = next(data_generator(params['train_path'], params))
print(text)
print(seg)
print(labels)
print(labels_mask)
Reading ../data/train.txt [101, 112, 872, 4761, 6887, 1914, 103, 1914, 7353, 6818, 3300, 784, 720, 1408, 8043, 1506, 1506, 3300, 4788, 103, 5456, 4696, 4638, 741, 677, 1091, 4638, 872, 1420, 1521, 872, 2157, 6929, 1779, 4788, 2357, 3221, 686, 4518, 677, 3297, 1920, 4638, 4788, 2357, 8024, 1506, 1506, 103, 7745, 872, 4638, 1568, 2124, 103, 6432, 2225, 1217, 2861, 10671, 4105, 2357, 3221, 7391, 4518, 677, 3297, 103, 4638, 4105, 2357, 1568, 1506, 1506, 1506, 112, 112, 4268, 4268, 8024, 1961, 4638, 103, 1355, 5456, 8013, 2769, 812, 1920, 2812, 7370, 3488, 2094, 6963, 103, 5436, 677, 3341, 2769, 4692, 1168, 3312, 1928, 5361, 7027, 103, 14509, 1355, 671, 2137, 103, 103, 166, 103, 1184, 1931, 1168, 4638, 103, 872, 6432, 3221, 679, 3221, 8043, 138, 4495, 4567, 140, 102] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [101, 112, 872, 4761, 6887, 1914, 840, 1914, 7353, 6818, 3300, 784, 720, 1408, 8043, 1506, 1506, 3300, 4788, 2357, 5456, 4696, 4638, 741, 677, 1091, 4638, 872, 1420, 1521, 872, 2157, 6929, 1779, 4788, 2357, 3221, 686, 4518, 677, 3297, 1920, 4638, 4788, 2357, 8024, 1506, 1506, 8024, 7745, 872, 4638, 1568, 2124, 3221, 6432, 2225, 1217, 2861, 4478, 4105, 2357, 3221, 686, 4518, 677, 3297, 1920, 4638, 4105, 2357, 1568, 1506, 1506, 1506, 112, 112, 4268, 4268, 8024, 1961, 4638, 1928, 1355, 5456, 8013, 2769, 812, 1920, 2812, 7370, 3488, 2094, 6963, 6206, 5436, 677, 3341, 2769, 4692, 1168, 3312, 1928, 5361, 7027, 3300, 1928, 1355, 671, 2137, 3221, 166, 166, 809, 1184, 1931, 1168, 4638, 8024, 872, 6432, 3221, 679, 3221, 8043, 138, 4495, 4567, 140, 102] [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
model = TFBertLMHeadModel.from_pretrained(params['pretrain_path'],
trainable = True,
return_dict = True)
model.load_weights('../model/bert_further_pretrain.h5')
step_size = 4 * params['buffer_size'] // params['batch_size']
decay_lr = tfa.optimizers.Triangular2CyclicalLearningRate(
initial_learning_rate = params['init_lr'],
maximal_learning_rate = params['max_lr'],
step_size = step_size,)
optim = tf.optimizers.Adam(params['init_lr'])
global_step = 0
best_acc = 0.69
t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)
# Baseline Accuracy
m = tf.keras.metrics.Accuracy()
for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
logits = model([text, tf.sign(text), seg], training=False).logits
m.update_state(
y_true = labels,
y_pred = tf.argmax(logits, -1),
sample_weight = labels_mask,)
acc = m.result().numpy()
logger.info("Baseline Accuracy: {:.3f}".format(acc))
for _ in range(params['n_epochs']):
# Training
for ((text, seg), (labels, labels_mask)) in dataset(is_training=True, params=params):
with tf.GradientTape() as tape:
logits = model([text, tf.sign(text), seg], training=True).logits
loss = tf.compat.v1.losses.softmax_cross_entropy(
onehot_labels = tf.one_hot(labels, 21128),
logits = logits,
weights = tf.cast(labels_mask, tf.float32),
label_smoothing = .2,)
trainable_vars = [v for v in model.trainable_variables if 'pooler' not in v.name]
optim.lr.assign(decay_lr(global_step))
grads = tape.gradient(loss, trainable_vars)
grads, _ = tf.clip_by_global_norm(grads, 5.)
optim.apply_gradients(zip(grads, trainable_vars))
if global_step % 100 == 0:
logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
t0 = time.time()
global_step += 1
# Evaluation
m = tf.keras.metrics.Accuracy()
for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
logits = model([text, tf.sign(text), seg], training=False).logits
m.update_state(
y_true = labels,
y_pred = tf.argmax(logits, -1),
sample_weight = labels_mask,)
acc = m.result().numpy()
logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))
if acc > best_acc:
best_acc = acc
model.save_weights('../model/bert_further_pretrain.h5', save_format='h5')
logger.info("Best Accuracy: {:.3f}".format(best_acc))
Reading ../data/test.txt INFO:tensorflow:Baseline Accuracy: 0.692 Reading ../data/train.txt INFO:tensorflow:Step 0 | Loss: 5.6769 | Spent: 52.0 secs | LR: 0.000010 INFO:tensorflow:Step 100 | Loss: 3.8050 | Spent: 53.5 secs | LR: 0.000010 INFO:tensorflow:Step 200 | Loss: 3.5434 | Spent: 54.6 secs | LR: 0.000011 INFO:tensorflow:Step 300 | Loss: 3.6904 | Spent: 55.3 secs | LR: 0.000011 INFO:tensorflow:Step 400 | Loss: 3.6488 | Spent: 55.3 secs | LR: 0.000011 INFO:tensorflow:Step 500 | Loss: 3.8685 | Spent: 55.4 secs | LR: 0.000011 INFO:tensorflow:Step 600 | Loss: 3.5431 | Spent: 54.7 secs | LR: 0.000012 INFO:tensorflow:Step 700 | Loss: 3.7322 | Spent: 55.2 secs | LR: 0.000012 INFO:tensorflow:Step 800 | Loss: 3.9790 | Spent: 54.4 secs | LR: 0.000012 INFO:tensorflow:Step 900 | Loss: 3.4762 | Spent: 55.2 secs | LR: 0.000012 INFO:tensorflow:Step 1000 | Loss: 3.5651 | Spent: 55.4 secs | LR: 0.000013 INFO:tensorflow:Step 1100 | Loss: 3.6787 | Spent: 55.1 secs | LR: 0.000013 INFO:tensorflow:Step 1200 | Loss: 3.5938 | Spent: 54.7 secs | LR: 0.000013 INFO:tensorflow:Step 1300 | Loss: 3.7927 | Spent: 55.2 secs | LR: 0.000013 INFO:tensorflow:Step 1400 | Loss: 3.5362 | Spent: 55.6 secs | LR: 0.000014 INFO:tensorflow:Step 1500 | Loss: 3.5193 | Spent: 55.2 secs | LR: 0.000014 INFO:tensorflow:Step 1600 | Loss: 3.2629 | Spent: 54.5 secs | LR: 0.000014 INFO:tensorflow:Step 1700 | Loss: 3.7281 | Spent: 55.1 secs | LR: 0.000014 INFO:tensorflow:Step 1800 | Loss: 3.3162 | Spent: 55.4 secs | LR: 0.000015 INFO:tensorflow:Step 1900 | Loss: 3.2278 | Spent: 54.9 secs | LR: 0.000015 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.689 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 2000 | Loss: 3.6417 | Spent: 107.2 secs | LR: 0.000015 INFO:tensorflow:Step 2100 | Loss: 3.7316 | Spent: 54.8 secs | LR: 0.000015 INFO:tensorflow:Step 2200 | Loss: 3.6453 | Spent: 55.0 secs | LR: 0.000016 INFO:tensorflow:Step 2300 | Loss: 3.6747 | Spent: 55.2 secs | LR: 0.000016 INFO:tensorflow:Step 2400 | Loss: 3.7780 | Spent: 55.1 secs | LR: 0.000016 INFO:tensorflow:Step 2500 | Loss: 3.6128 | Spent: 55.3 secs | LR: 0.000016 INFO:tensorflow:Step 2600 | Loss: 3.5500 | Spent: 55.1 secs | LR: 0.000017 INFO:tensorflow:Step 2700 | Loss: 3.4986 | Spent: 54.5 secs | LR: 0.000017 INFO:tensorflow:Step 2800 | Loss: 3.7140 | Spent: 54.4 secs | LR: 0.000017 INFO:tensorflow:Step 2900 | Loss: 3.4243 | Spent: 53.6 secs | LR: 0.000017 INFO:tensorflow:Step 3000 | Loss: 3.4860 | Spent: 54.0 secs | LR: 0.000018 INFO:tensorflow:Step 3100 | Loss: 3.4368 | Spent: 55.8 secs | LR: 0.000018 INFO:tensorflow:Step 3200 | Loss: 3.5442 | Spent: 55.5 secs | LR: 0.000018 INFO:tensorflow:Step 3300 | Loss: 3.8786 | Spent: 53.6 secs | LR: 0.000018 INFO:tensorflow:Step 3400 | Loss: 3.4672 | Spent: 54.9 secs | LR: 0.000019 INFO:tensorflow:Step 3500 | Loss: 3.8257 | Spent: 55.0 secs | LR: 0.000019 INFO:tensorflow:Step 3600 | Loss: 3.6563 | Spent: 54.8 secs | LR: 0.000019 INFO:tensorflow:Step 3700 | Loss: 3.8164 | Spent: 55.3 secs | LR: 0.000019 INFO:tensorflow:Step 3800 | Loss: 3.5402 | Spent: 54.8 secs | LR: 0.000020 INFO:tensorflow:Step 3900 | Loss: 3.6487 | Spent: 55.2 secs | LR: 0.000020 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.683 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 4000 | Loss: 3.5141 | Spent: 106.9 secs | LR: 0.000020 INFO:tensorflow:Step 4100 | Loss: 3.5216 | Spent: 55.1 secs | LR: 0.000020 INFO:tensorflow:Step 4200 | Loss: 3.4435 | Spent: 55.3 secs | LR: 0.000021 INFO:tensorflow:Step 4300 | Loss: 3.3913 | Spent: 54.9 secs | LR: 0.000021 INFO:tensorflow:Step 4400 | Loss: 3.4233 | Spent: 55.8 secs | LR: 0.000021 INFO:tensorflow:Step 4500 | Loss: 3.4909 | Spent: 54.4 secs | LR: 0.000021 INFO:tensorflow:Step 4600 | Loss: 3.3695 | Spent: 55.0 secs | LR: 0.000022 INFO:tensorflow:Step 4700 | Loss: 3.5487 | Spent: 54.1 secs | LR: 0.000022 INFO:tensorflow:Step 4800 | Loss: 4.2573 | Spent: 55.1 secs | LR: 0.000022 INFO:tensorflow:Step 4900 | Loss: 3.4999 | Spent: 56.2 secs | LR: 0.000022 INFO:tensorflow:Step 5000 | Loss: 3.2891 | Spent: 55.5 secs | LR: 0.000023 INFO:tensorflow:Step 5100 | Loss: 3.2396 | Spent: 55.1 secs | LR: 0.000023 INFO:tensorflow:Step 5200 | Loss: 3.9447 | Spent: 55.4 secs | LR: 0.000023 INFO:tensorflow:Step 5300 | Loss: 3.2533 | Spent: 54.7 secs | LR: 0.000023 INFO:tensorflow:Step 5400 | Loss: 3.2201 | Spent: 55.0 secs | LR: 0.000024 INFO:tensorflow:Step 5500 | Loss: 3.4030 | Spent: 55.0 secs | LR: 0.000024 INFO:tensorflow:Step 5600 | Loss: 3.7845 | Spent: 54.9 secs | LR: 0.000024 INFO:tensorflow:Step 5700 | Loss: 3.5878 | Spent: 54.9 secs | LR: 0.000024 INFO:tensorflow:Step 5800 | Loss: 4.1874 | Spent: 54.7 secs | LR: 0.000025 INFO:tensorflow:Step 5900 | Loss: 3.2908 | Spent: 56.2 secs | LR: 0.000025 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.678 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 6000 | Loss: 4.0138 | Spent: 107.9 secs | LR: 0.000025 INFO:tensorflow:Step 6100 | Loss: 3.9018 | Spent: 55.2 secs | LR: 0.000025 INFO:tensorflow:Step 6200 | Loss: 3.5683 | Spent: 55.0 secs | LR: 0.000026 INFO:tensorflow:Step 6300 | Loss: 3.7102 | Spent: 55.0 secs | LR: 0.000026 INFO:tensorflow:Step 6400 | Loss: 3.5180 | Spent: 55.2 secs | LR: 0.000026 INFO:tensorflow:Step 6500 | Loss: 3.4196 | Spent: 55.9 secs | LR: 0.000026 INFO:tensorflow:Step 6600 | Loss: 3.4177 | Spent: 55.2 secs | LR: 0.000027 INFO:tensorflow:Step 6700 | Loss: 3.8939 | Spent: 55.1 secs | LR: 0.000027 INFO:tensorflow:Step 6800 | Loss: 3.7608 | Spent: 55.2 secs | LR: 0.000027 INFO:tensorflow:Step 6900 | Loss: 3.8222 | Spent: 55.0 secs | LR: 0.000027 INFO:tensorflow:Step 7000 | Loss: 3.7076 | Spent: 55.2 secs | LR: 0.000028 INFO:tensorflow:Step 7100 | Loss: 3.3064 | Spent: 55.5 secs | LR: 0.000028 INFO:tensorflow:Step 7200 | Loss: 3.4243 | Spent: 54.6 secs | LR: 0.000028 INFO:tensorflow:Step 7300 | Loss: 3.4759 | Spent: 55.4 secs | LR: 0.000028 INFO:tensorflow:Step 7400 | Loss: 3.4724 | Spent: 54.9 secs | LR: 0.000029 INFO:tensorflow:Step 7500 | Loss: 3.8504 | Spent: 54.2 secs | LR: 0.000029 INFO:tensorflow:Step 7600 | Loss: 3.6520 | Spent: 55.6 secs | LR: 0.000029 INFO:tensorflow:Step 7700 | Loss: 3.6649 | Spent: 54.9 secs | LR: 0.000029 INFO:tensorflow:Step 7800 | Loss: 3.5080 | Spent: 55.7 secs | LR: 0.000030 INFO:tensorflow:Step 7900 | Loss: 3.4644 | Spent: 55.1 secs | LR: 0.000030 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.678 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 8000 | Loss: 3.1942 | Spent: 108.9 secs | LR: 0.000030 INFO:tensorflow:Step 8100 | Loss: 3.5159 | Spent: 55.8 secs | LR: 0.000030 INFO:tensorflow:Step 8200 | Loss: 3.8925 | Spent: 55.7 secs | LR: 0.000029 INFO:tensorflow:Step 8300 | Loss: 3.3938 | Spent: 54.2 secs | LR: 0.000029 INFO:tensorflow:Step 8400 | Loss: 3.8378 | Spent: 54.9 secs | LR: 0.000029 INFO:tensorflow:Step 8500 | Loss: 3.6447 | Spent: 55.2 secs | LR: 0.000029 INFO:tensorflow:Step 8600 | Loss: 3.7910 | Spent: 54.4 secs | LR: 0.000028 INFO:tensorflow:Step 8700 | Loss: 3.3949 | Spent: 55.1 secs | LR: 0.000028 INFO:tensorflow:Step 8800 | Loss: 3.5859 | Spent: 55.7 secs | LR: 0.000028 INFO:tensorflow:Step 8900 | Loss: 3.3711 | Spent: 55.1 secs | LR: 0.000028 INFO:tensorflow:Step 9000 | Loss: 3.0112 | Spent: 54.7 secs | LR: 0.000027 INFO:tensorflow:Step 9100 | Loss: 3.2547 | Spent: 55.1 secs | LR: 0.000027 INFO:tensorflow:Step 9200 | Loss: 3.7495 | Spent: 55.6 secs | LR: 0.000027 INFO:tensorflow:Step 9300 | Loss: 3.6319 | Spent: 54.3 secs | LR: 0.000027 INFO:tensorflow:Step 9400 | Loss: 3.3819 | Spent: 55.2 secs | LR: 0.000026 INFO:tensorflow:Step 9500 | Loss: 3.6728 | Spent: 55.4 secs | LR: 0.000026 INFO:tensorflow:Step 9600 | Loss: 3.7229 | Spent: 55.5 secs | LR: 0.000026 INFO:tensorflow:Step 9700 | Loss: 3.3451 | Spent: 55.8 secs | LR: 0.000026 INFO:tensorflow:Step 9800 | Loss: 3.3644 | Spent: 55.0 secs | LR: 0.000025 INFO:tensorflow:Step 9900 | Loss: 3.6134 | Spent: 54.8 secs | LR: 0.000025 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.675 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 10000 | Loss: 3.2713 | Spent: 109.4 secs | LR: 0.000025 INFO:tensorflow:Step 10100 | Loss: 3.1902 | Spent: 54.8 secs | LR: 0.000025 INFO:tensorflow:Step 10200 | Loss: 3.7453 | Spent: 55.8 secs | LR: 0.000024 INFO:tensorflow:Step 10300 | Loss: 3.7865 | Spent: 55.7 secs | LR: 0.000024 INFO:tensorflow:Step 10400 | Loss: 3.6286 | Spent: 54.6 secs | LR: 0.000024 INFO:tensorflow:Step 10500 | Loss: 3.8200 | Spent: 54.6 secs | LR: 0.000024 INFO:tensorflow:Step 10600 | Loss: 3.4029 | Spent: 56.2 secs | LR: 0.000023 INFO:tensorflow:Step 10700 | Loss: 3.5689 | Spent: 55.6 secs | LR: 0.000023 INFO:tensorflow:Step 10800 | Loss: 3.4493 | Spent: 55.3 secs | LR: 0.000023 INFO:tensorflow:Step 10900 | Loss: 3.7443 | Spent: 55.2 secs | LR: 0.000023 INFO:tensorflow:Step 11000 | Loss: 3.3699 | Spent: 55.4 secs | LR: 0.000022 INFO:tensorflow:Step 11100 | Loss: 3.5517 | Spent: 55.5 secs | LR: 0.000022 INFO:tensorflow:Step 11200 | Loss: 3.3877 | Spent: 54.1 secs | LR: 0.000022 INFO:tensorflow:Step 11300 | Loss: 3.5568 | Spent: 55.3 secs | LR: 0.000022 INFO:tensorflow:Step 11400 | Loss: 3.6565 | Spent: 56.0 secs | LR: 0.000021 INFO:tensorflow:Step 11500 | Loss: 3.6198 | Spent: 54.1 secs | LR: 0.000021 INFO:tensorflow:Step 11600 | Loss: 4.1227 | Spent: 54.4 secs | LR: 0.000021 INFO:tensorflow:Step 11700 | Loss: 3.7193 | Spent: 55.2 secs | LR: 0.000020 INFO:tensorflow:Step 11800 | Loss: 3.7078 | Spent: 55.4 secs | LR: 0.000020 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.678 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 11900 | Loss: 3.2168 | Spent: 108.3 secs | LR: 0.000020 INFO:tensorflow:Step 12000 | Loss: 3.3429 | Spent: 55.2 secs | LR: 0.000020 INFO:tensorflow:Step 12100 | Loss: 3.6657 | Spent: 56.2 secs | LR: 0.000019 INFO:tensorflow:Step 12200 | Loss: 3.5878 | Spent: 54.7 secs | LR: 0.000019 INFO:tensorflow:Step 12300 | Loss: 3.1195 | Spent: 54.3 secs | LR: 0.000019 INFO:tensorflow:Step 12400 | Loss: 3.3120 | Spent: 55.7 secs | LR: 0.000019 INFO:tensorflow:Step 12500 | Loss: 3.3013 | Spent: 55.6 secs | LR: 0.000018 INFO:tensorflow:Step 12600 | Loss: 3.1425 | Spent: 55.5 secs | LR: 0.000018 INFO:tensorflow:Step 12700 | Loss: 3.6787 | Spent: 55.3 secs | LR: 0.000018 INFO:tensorflow:Step 12800 | Loss: 3.2395 | Spent: 55.1 secs | LR: 0.000018 INFO:tensorflow:Step 12900 | Loss: 3.3843 | Spent: 54.7 secs | LR: 0.000017 INFO:tensorflow:Step 13000 | Loss: 3.3074 | Spent: 55.1 secs | LR: 0.000017 INFO:tensorflow:Step 13100 | Loss: 3.0785 | Spent: 55.8 secs | LR: 0.000017 INFO:tensorflow:Step 13200 | Loss: 3.9031 | Spent: 54.4 secs | LR: 0.000017 INFO:tensorflow:Step 13300 | Loss: 3.4434 | Spent: 55.1 secs | LR: 0.000016 INFO:tensorflow:Step 13400 | Loss: 3.5457 | Spent: 53.9 secs | LR: 0.000016 INFO:tensorflow:Step 13500 | Loss: 3.5615 | Spent: 56.1 secs | LR: 0.000016 INFO:tensorflow:Step 13600 | Loss: 3.5185 | Spent: 56.2 secs | LR: 0.000016 INFO:tensorflow:Step 13700 | Loss: 3.3583 | Spent: 55.0 secs | LR: 0.000015 INFO:tensorflow:Step 13800 | Loss: 3.4007 | Spent: 55.3 secs | LR: 0.000015 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.687 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 13900 | Loss: 3.7554 | Spent: 108.1 secs | LR: 0.000015 INFO:tensorflow:Step 14000 | Loss: 3.3485 | Spent: 55.4 secs | LR: 0.000015 INFO:tensorflow:Step 14100 | Loss: 3.4840 | Spent: 55.8 secs | LR: 0.000014 INFO:tensorflow:Step 14200 | Loss: 3.5770 | Spent: 54.8 secs | LR: 0.000014 INFO:tensorflow:Step 14300 | Loss: 3.2948 | Spent: 55.0 secs | LR: 0.000014 INFO:tensorflow:Step 14400 | Loss: 3.6543 | Spent: 55.7 secs | LR: 0.000014 INFO:tensorflow:Step 14500 | Loss: 3.5392 | Spent: 55.7 secs | LR: 0.000013 INFO:tensorflow:Step 14600 | Loss: 3.4148 | Spent: 55.8 secs | LR: 0.000013 INFO:tensorflow:Step 14700 | Loss: 3.6276 | Spent: 54.9 secs | LR: 0.000013 INFO:tensorflow:Step 14800 | Loss: 3.6287 | Spent: 55.2 secs | LR: 0.000013 INFO:tensorflow:Step 14900 | Loss: 3.3177 | Spent: 55.2 secs | LR: 0.000012 INFO:tensorflow:Step 15000 | Loss: 3.9401 | Spent: 55.9 secs | LR: 0.000012 INFO:tensorflow:Step 15100 | Loss: 3.4821 | Spent: 55.1 secs | LR: 0.000012 INFO:tensorflow:Step 15200 | Loss: 3.4535 | Spent: 55.1 secs | LR: 0.000012 INFO:tensorflow:Step 15300 | Loss: 3.3881 | Spent: 54.6 secs | LR: 0.000011 INFO:tensorflow:Step 15400 | Loss: 3.5375 | Spent: 55.1 secs | LR: 0.000011 INFO:tensorflow:Step 15500 | Loss: 3.2492 | Spent: 54.8 secs | LR: 0.000011 INFO:tensorflow:Step 15600 | Loss: 3.5275 | Spent: 55.5 secs | LR: 0.000011 INFO:tensorflow:Step 15700 | Loss: 3.4098 | Spent: 55.8 secs | LR: 0.000010 INFO:tensorflow:Step 15800 | Loss: 3.7177 | Spent: 54.2 secs | LR: 0.000010 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.684 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 15900 | Loss: 3.6681 | Spent: 107.3 secs | LR: 0.000010 INFO:tensorflow:Step 16000 | Loss: 3.5731 | Spent: 56.0 secs | LR: 0.000010 INFO:tensorflow:Step 16100 | Loss: 4.0069 | Spent: 54.8 secs | LR: 0.000010 INFO:tensorflow:Step 16200 | Loss: 3.2370 | Spent: 56.0 secs | LR: 0.000010 INFO:tensorflow:Step 16300 | Loss: 3.3906 | Spent: 54.9 secs | LR: 0.000011 INFO:tensorflow:Step 16400 | Loss: 3.5402 | Spent: 55.2 secs | LR: 0.000011 INFO:tensorflow:Step 16500 | Loss: 3.4992 | Spent: 54.9 secs | LR: 0.000011 INFO:tensorflow:Step 16600 | Loss: 3.5970 | Spent: 56.6 secs | LR: 0.000011 INFO:tensorflow:Step 16700 | Loss: 3.5464 | Spent: 55.6 secs | LR: 0.000011 INFO:tensorflow:Step 16800 | Loss: 3.4909 | Spent: 55.3 secs | LR: 0.000011 INFO:tensorflow:Step 16900 | Loss: 3.3388 | Spent: 55.2 secs | LR: 0.000011 INFO:tensorflow:Step 17000 | Loss: 3.6430 | Spent: 55.6 secs | LR: 0.000011 INFO:tensorflow:Step 17100 | Loss: 3.3316 | Spent: 56.3 secs | LR: 0.000012 INFO:tensorflow:Step 17200 | Loss: 3.5725 | Spent: 54.9 secs | LR: 0.000012 INFO:tensorflow:Step 17300 | Loss: 3.3967 | Spent: 56.1 secs | LR: 0.000012 INFO:tensorflow:Step 17400 | Loss: 3.3284 | Spent: 54.9 secs | LR: 0.000012 INFO:tensorflow:Step 17500 | Loss: 3.3785 | Spent: 55.6 secs | LR: 0.000012 INFO:tensorflow:Step 17600 | Loss: 3.3781 | Spent: 55.9 secs | LR: 0.000012 INFO:tensorflow:Step 17700 | Loss: 3.3265 | Spent: 55.5 secs | LR: 0.000012 INFO:tensorflow:Step 17800 | Loss: 3.2192 | Spent: 55.2 secs | LR: 0.000012 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.683 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 17900 | Loss: 3.7973 | Spent: 108.7 secs | LR: 0.000013 INFO:tensorflow:Step 18000 | Loss: 3.7477 | Spent: 55.7 secs | LR: 0.000013 INFO:tensorflow:Step 18100 | Loss: 3.7151 | Spent: 55.8 secs | LR: 0.000013 INFO:tensorflow:Step 18200 | Loss: 3.1842 | Spent: 55.8 secs | LR: 0.000013 INFO:tensorflow:Step 18300 | Loss: 3.5950 | Spent: 55.4 secs | LR: 0.000013 INFO:tensorflow:Step 18400 | Loss: 3.5127 | Spent: 55.1 secs | LR: 0.000013 INFO:tensorflow:Step 18500 | Loss: 3.5419 | Spent: 55.8 secs | LR: 0.000013 INFO:tensorflow:Step 18600 | Loss: 3.1175 | Spent: 55.9 secs | LR: 0.000013 INFO:tensorflow:Step 18700 | Loss: 3.2977 | Spent: 55.9 secs | LR: 0.000014 INFO:tensorflow:Step 18800 | Loss: 4.0662 | Spent: 56.7 secs | LR: 0.000014 INFO:tensorflow:Step 18900 | Loss: 3.3256 | Spent: 55.6 secs | LR: 0.000014 INFO:tensorflow:Step 19000 | Loss: 3.3141 | Spent: 56.1 secs | LR: 0.000014 INFO:tensorflow:Step 19100 | Loss: 3.4430 | Spent: 54.9 secs | LR: 0.000014 INFO:tensorflow:Step 19200 | Loss: 3.4444 | Spent: 55.8 secs | LR: 0.000014 INFO:tensorflow:Step 19300 | Loss: 3.1612 | Spent: 56.0 secs | LR: 0.000014 INFO:tensorflow:Step 19400 | Loss: 3.5657 | Spent: 55.3 secs | LR: 0.000014 INFO:tensorflow:Step 19500 | Loss: 3.6605 | Spent: 56.0 secs | LR: 0.000015 INFO:tensorflow:Step 19600 | Loss: 3.5686 | Spent: 55.6 secs | LR: 0.000015 INFO:tensorflow:Step 19700 | Loss: 3.5282 | Spent: 56.1 secs | LR: 0.000015 INFO:tensorflow:Step 19800 | Loss: 3.3508 | Spent: 55.5 secs | LR: 0.000015 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.687 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 19900 | Loss: 3.2009 | Spent: 108.6 secs | LR: 0.000015 INFO:tensorflow:Step 20000 | Loss: 3.3953 | Spent: 55.6 secs | LR: 0.000015 INFO:tensorflow:Step 20100 | Loss: 3.6633 | Spent: 55.7 secs | LR: 0.000015 INFO:tensorflow:Step 20200 | Loss: 3.3986 | Spent: 55.7 secs | LR: 0.000015 INFO:tensorflow:Step 20300 | Loss: 3.5184 | Spent: 56.9 secs | LR: 0.000016 INFO:tensorflow:Step 20400 | Loss: 3.1824 | Spent: 55.9 secs | LR: 0.000016 INFO:tensorflow:Step 20500 | Loss: 3.5896 | Spent: 55.2 secs | LR: 0.000016 INFO:tensorflow:Step 20600 | Loss: 3.3970 | Spent: 55.5 secs | LR: 0.000016 INFO:tensorflow:Step 20700 | Loss: 3.2737 | Spent: 55.4 secs | LR: 0.000016 INFO:tensorflow:Step 20800 | Loss: 3.8020 | Spent: 55.6 secs | LR: 0.000016 INFO:tensorflow:Step 20900 | Loss: 3.5589 | Spent: 55.5 secs | LR: 0.000016 INFO:tensorflow:Step 21000 | Loss: 3.6861 | Spent: 55.2 secs | LR: 0.000016 INFO:tensorflow:Step 21100 | Loss: 3.4643 | Spent: 54.8 secs | LR: 0.000017 INFO:tensorflow:Step 21200 | Loss: 3.1286 | Spent: 55.1 secs | LR: 0.000017 INFO:tensorflow:Step 21300 | Loss: 3.5893 | Spent: 55.8 secs | LR: 0.000017 INFO:tensorflow:Step 21400 | Loss: 3.7732 | Spent: 55.2 secs | LR: 0.000017 INFO:tensorflow:Step 21500 | Loss: 3.5662 | Spent: 56.3 secs | LR: 0.000017 INFO:tensorflow:Step 21600 | Loss: 3.7830 | Spent: 56.4 secs | LR: 0.000017 INFO:tensorflow:Step 21700 | Loss: 3.3065 | Spent: 56.2 secs | LR: 0.000017 INFO:tensorflow:Step 21800 | Loss: 3.2869 | Spent: 54.7 secs | LR: 0.000017 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.688 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 21900 | Loss: 3.4965 | Spent: 108.6 secs | LR: 0.000018 INFO:tensorflow:Step 22000 | Loss: 3.5695 | Spent: 55.2 secs | LR: 0.000018 INFO:tensorflow:Step 22100 | Loss: 3.3541 | Spent: 55.7 secs | LR: 0.000018 INFO:tensorflow:Step 22200 | Loss: 3.5576 | Spent: 55.0 secs | LR: 0.000018 INFO:tensorflow:Step 22300 | Loss: 3.5603 | Spent: 55.7 secs | LR: 0.000018 INFO:tensorflow:Step 22400 | Loss: 3.5441 | Spent: 56.0 secs | LR: 0.000018 INFO:tensorflow:Step 22500 | Loss: 3.3596 | Spent: 55.7 secs | LR: 0.000018 INFO:tensorflow:Step 22600 | Loss: 3.4712 | Spent: 55.7 secs | LR: 0.000018 INFO:tensorflow:Step 22700 | Loss: 3.4746 | Spent: 55.9 secs | LR: 0.000019 INFO:tensorflow:Step 22800 | Loss: 3.3712 | Spent: 56.5 secs | LR: 0.000019 INFO:tensorflow:Step 22900 | Loss: 3.8276 | Spent: 55.9 secs | LR: 0.000019 INFO:tensorflow:Step 23000 | Loss: 3.3725 | Spent: 55.2 secs | LR: 0.000019 INFO:tensorflow:Step 23100 | Loss: 3.3543 | Spent: 56.2 secs | LR: 0.000019 INFO:tensorflow:Step 23200 | Loss: 3.3482 | Spent: 55.9 secs | LR: 0.000019 INFO:tensorflow:Step 23300 | Loss: 3.5583 | Spent: 55.7 secs | LR: 0.000019 INFO:tensorflow:Step 23400 | Loss: 3.4913 | Spent: 56.5 secs | LR: 0.000020 INFO:tensorflow:Step 23500 | Loss: 3.5286 | Spent: 55.9 secs | LR: 0.000020 INFO:tensorflow:Step 23600 | Loss: 3.4798 | Spent: 56.2 secs | LR: 0.000020 INFO:tensorflow:Step 23700 | Loss: 3.0739 | Spent: 55.0 secs | LR: 0.000020 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.681 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 23800 | Loss: 3.3896 | Spent: 109.0 secs | LR: 0.000020 INFO:tensorflow:Step 23900 | Loss: 3.7211 | Spent: 56.1 secs | LR: 0.000020 INFO:tensorflow:Step 24000 | Loss: 3.4311 | Spent: 56.1 secs | LR: 0.000020 INFO:tensorflow:Step 24100 | Loss: 3.6965 | Spent: 55.4 secs | LR: 0.000020 INFO:tensorflow:Step 24200 | Loss: 3.5498 | Spent: 56.5 secs | LR: 0.000019 INFO:tensorflow:Step 24300 | Loss: 3.4646 | Spent: 56.1 secs | LR: 0.000019 INFO:tensorflow:Step 24400 | Loss: 3.1341 | Spent: 54.8 secs | LR: 0.000019 INFO:tensorflow:Step 24500 | Loss: 3.4642 | Spent: 55.8 secs | LR: 0.000019 INFO:tensorflow:Step 24600 | Loss: 3.4722 | Spent: 56.5 secs | LR: 0.000019 INFO:tensorflow:Step 24700 | Loss: 3.3730 | Spent: 56.1 secs | LR: 0.000019 INFO:tensorflow:Step 24800 | Loss: 3.1634 | Spent: 55.6 secs | LR: 0.000019 INFO:tensorflow:Step 24900 | Loss: 3.3980 | Spent: 55.9 secs | LR: 0.000019 INFO:tensorflow:Step 25000 | Loss: 3.7908 | Spent: 54.8 secs | LR: 0.000018 INFO:tensorflow:Step 25100 | Loss: 3.4017 | Spent: 55.5 secs | LR: 0.000018 INFO:tensorflow:Step 25200 | Loss: 3.6200 | Spent: 56.3 secs | LR: 0.000018 INFO:tensorflow:Step 25300 | Loss: 3.4212 | Spent: 55.0 secs | LR: 0.000018 INFO:tensorflow:Step 25400 | Loss: 3.4646 | Spent: 56.5 secs | LR: 0.000018 INFO:tensorflow:Step 25500 | Loss: 3.7782 | Spent: 55.7 secs | LR: 0.000018 INFO:tensorflow:Step 25600 | Loss: 3.4364 | Spent: 55.0 secs | LR: 0.000018 INFO:tensorflow:Step 25700 | Loss: 3.7769 | Spent: 55.7 secs | LR: 0.000018 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.683 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 25800 | Loss: 3.6888 | Spent: 109.1 secs | LR: 0.000017 INFO:tensorflow:Step 25900 | Loss: 3.3691 | Spent: 56.2 secs | LR: 0.000017 INFO:tensorflow:Step 26000 | Loss: 3.2641 | Spent: 55.0 secs | LR: 0.000017 INFO:tensorflow:Step 26100 | Loss: 3.4814 | Spent: 55.0 secs | LR: 0.000017 INFO:tensorflow:Step 26200 | Loss: 3.2663 | Spent: 54.8 secs | LR: 0.000017 INFO:tensorflow:Step 26300 | Loss: 3.3012 | Spent: 56.2 secs | LR: 0.000017 INFO:tensorflow:Step 26400 | Loss: 3.1017 | Spent: 54.7 secs | LR: 0.000017 INFO:tensorflow:Step 26500 | Loss: 3.0863 | Spent: 56.0 secs | LR: 0.000017 INFO:tensorflow:Step 26600 | Loss: 3.8584 | Spent: 56.0 secs | LR: 0.000016 INFO:tensorflow:Step 26700 | Loss: 3.7103 | Spent: 56.2 secs | LR: 0.000016 INFO:tensorflow:Step 26800 | Loss: 3.1810 | Spent: 56.3 secs | LR: 0.000016 INFO:tensorflow:Step 26900 | Loss: 3.2902 | Spent: 55.8 secs | LR: 0.000016 INFO:tensorflow:Step 27000 | Loss: 3.2728 | Spent: 56.3 secs | LR: 0.000016 INFO:tensorflow:Step 27100 | Loss: 3.5410 | Spent: 55.3 secs | LR: 0.000016 INFO:tensorflow:Step 27200 | Loss: 3.6078 | Spent: 55.1 secs | LR: 0.000016 INFO:tensorflow:Step 27300 | Loss: 3.2122 | Spent: 55.1 secs | LR: 0.000016 INFO:tensorflow:Step 27400 | Loss: 3.1172 | Spent: 55.6 secs | LR: 0.000015 INFO:tensorflow:Step 27500 | Loss: 3.4266 | Spent: 55.8 secs | LR: 0.000015 INFO:tensorflow:Step 27600 | Loss: 3.4485 | Spent: 56.0 secs | LR: 0.000015 INFO:tensorflow:Step 27700 | Loss: 3.1243 | Spent: 55.8 secs | LR: 0.000015 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.684 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 27800 | Loss: 3.2837 | Spent: 109.3 secs | LR: 0.000015 INFO:tensorflow:Step 27900 | Loss: 3.7226 | Spent: 55.8 secs | LR: 0.000015 INFO:tensorflow:Step 28000 | Loss: 3.2673 | Spent: 55.2 secs | LR: 0.000015 INFO:tensorflow:Step 28100 | Loss: 3.3249 | Spent: 56.4 secs | LR: 0.000015 INFO:tensorflow:Step 28200 | Loss: 3.5676 | Spent: 55.7 secs | LR: 0.000014 INFO:tensorflow:Step 28300 | Loss: 3.4144 | Spent: 55.1 secs | LR: 0.000014 INFO:tensorflow:Step 28400 | Loss: 3.4208 | Spent: 55.4 secs | LR: 0.000014 INFO:tensorflow:Step 28500 | Loss: 3.4895 | Spent: 56.2 secs | LR: 0.000014 INFO:tensorflow:Step 28600 | Loss: 3.3893 | Spent: 55.8 secs | LR: 0.000014 INFO:tensorflow:Step 28700 | Loss: 3.4956 | Spent: 54.8 secs | LR: 0.000014 INFO:tensorflow:Step 28800 | Loss: 3.1416 | Spent: 55.2 secs | LR: 0.000014 INFO:tensorflow:Step 28900 | Loss: 3.2458 | Spent: 55.2 secs | LR: 0.000014 INFO:tensorflow:Step 29000 | Loss: 3.3024 | Spent: 55.7 secs | LR: 0.000013 INFO:tensorflow:Step 29100 | Loss: 3.3627 | Spent: 55.9 secs | LR: 0.000013 INFO:tensorflow:Step 29200 | Loss: 3.4465 | Spent: 54.8 secs | LR: 0.000013 INFO:tensorflow:Step 29300 | Loss: 3.3412 | Spent: 54.8 secs | LR: 0.000013 INFO:tensorflow:Step 29400 | Loss: 3.3679 | Spent: 55.5 secs | LR: 0.000013 INFO:tensorflow:Step 29500 | Loss: 3.5603 | Spent: 56.1 secs | LR: 0.000013 INFO:tensorflow:Step 29600 | Loss: 3.5400 | Spent: 55.4 secs | LR: 0.000013 INFO:tensorflow:Step 29700 | Loss: 3.3857 | Spent: 55.5 secs | LR: 0.000013 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.685 INFO:tensorflow:Best Accuracy: 0.690 Reading ../data/train.txt INFO:tensorflow:Step 29800 | Loss: 3.3477 | Spent: 107.6 secs | LR: 0.000012 INFO:tensorflow:Step 29900 | Loss: 3.3515 | Spent: 54.5 secs | LR: 0.000012 INFO:tensorflow:Step 30000 | Loss: 3.3351 | Spent: 55.6 secs | LR: 0.000012 INFO:tensorflow:Step 30100 | Loss: 3.1330 | Spent: 54.8 secs | LR: 0.000012 INFO:tensorflow:Step 30200 | Loss: 3.7729 | Spent: 55.3 secs | LR: 0.000012 INFO:tensorflow:Step 30300 | Loss: 3.8072 | Spent: 55.9 secs | LR: 0.000012 INFO:tensorflow:Step 30400 | Loss: 3.7825 | Spent: 55.5 secs | LR: 0.000012 INFO:tensorflow:Step 30500 | Loss: 3.3082 | Spent: 55.3 secs | LR: 0.000012 INFO:tensorflow:Step 30600 | Loss: 3.3584 | Spent: 55.2 secs | LR: 0.000011 INFO:tensorflow:Step 30700 | Loss: 3.4730 | Spent: 55.6 secs | LR: 0.000011 INFO:tensorflow:Step 30800 | Loss: 3.2926 | Spent: 55.1 secs | LR: 0.000011 INFO:tensorflow:Step 30900 | Loss: 3.5787 | Spent: 55.6 secs | LR: 0.000011 INFO:tensorflow:Step 31000 | Loss: 3.4806 | Spent: 54.7 secs | LR: 0.000011 INFO:tensorflow:Step 31100 | Loss: 3.2747 | Spent: 55.5 secs | LR: 0.000011 INFO:tensorflow:Step 31200 | Loss: 3.2920 | Spent: 55.7 secs | LR: 0.000011 INFO:tensorflow:Step 31300 | Loss: 3.4519 | Spent: 54.6 secs | LR: 0.000011 INFO:tensorflow:Step 31400 | Loss: 3.3185 | Spent: 55.6 secs | LR: 0.000010 INFO:tensorflow:Step 31500 | Loss: 3.3589 | Spent: 55.2 secs | LR: 0.000010 INFO:tensorflow:Step 31600 | Loss: 3.0051 | Spent: 54.9 secs | LR: 0.000010 INFO:tensorflow:Step 31700 | Loss: 3.6349 | Spent: 54.5 secs | LR: 0.000010 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.693 INFO:tensorflow:Best Accuracy: 0.693 Reading ../data/train.txt INFO:tensorflow:Step 31800 | Loss: 3.2938 | Spent: 110.5 secs | LR: 0.000010 INFO:tensorflow:Step 31900 | Loss: 3.6376 | Spent: 56.0 secs | LR: 0.000010 INFO:tensorflow:Step 32000 | Loss: 3.7436 | Spent: 54.6 secs | LR: 0.000010 INFO:tensorflow:Step 32100 | Loss: 3.6514 | Spent: 56.6 secs | LR: 0.000010 INFO:tensorflow:Step 32200 | Loss: 3.3633 | Spent: 55.5 secs | LR: 0.000010 INFO:tensorflow:Step 32300 | Loss: 3.3325 | Spent: 56.0 secs | LR: 0.000010 INFO:tensorflow:Step 32400 | Loss: 3.4948 | Spent: 55.2 secs | LR: 0.000010 INFO:tensorflow:Step 32500 | Loss: 3.3165 | Spent: 55.3 secs | LR: 0.000010 INFO:tensorflow:Step 32600 | Loss: 3.0973 | Spent: 56.2 secs | LR: 0.000011 INFO:tensorflow:Step 32700 | Loss: 3.3240 | Spent: 54.6 secs | LR: 0.000011 INFO:tensorflow:Step 32800 | Loss: 3.4633 | Spent: 57.0 secs | LR: 0.000011 INFO:tensorflow:Step 32900 | Loss: 3.4034 | Spent: 55.3 secs | LR: 0.000011 INFO:tensorflow:Step 33000 | Loss: 3.1864 | Spent: 55.1 secs | LR: 0.000011 INFO:tensorflow:Step 33100 | Loss: 3.3376 | Spent: 56.2 secs | LR: 0.000011 INFO:tensorflow:Step 33200 | Loss: 3.3107 | Spent: 55.4 secs | LR: 0.000011 INFO:tensorflow:Step 33300 | Loss: 3.1326 | Spent: 55.8 secs | LR: 0.000011 INFO:tensorflow:Step 33400 | Loss: 3.9004 | Spent: 56.3 secs | LR: 0.000011 INFO:tensorflow:Step 33500 | Loss: 3.0202 | Spent: 55.0 secs | LR: 0.000011 INFO:tensorflow:Step 33600 | Loss: 3.3646 | Spent: 55.6 secs | LR: 0.000011 INFO:tensorflow:Step 33700 | Loss: 3.3256 | Spent: 55.0 secs | LR: 0.000011 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.685 INFO:tensorflow:Best Accuracy: 0.693 Reading ../data/train.txt INFO:tensorflow:Step 33800 | Loss: 3.4914 | Spent: 108.8 secs | LR: 0.000011 INFO:tensorflow:Step 33900 | Loss: 3.6445 | Spent: 55.3 secs | LR: 0.000011 INFO:tensorflow:Step 34000 | Loss: 3.0491 | Spent: 54.5 secs | LR: 0.000011 INFO:tensorflow:Step 34100 | Loss: 3.5138 | Spent: 55.9 secs | LR: 0.000011 INFO:tensorflow:Step 34200 | Loss: 3.2992 | Spent: 56.1 secs | LR: 0.000012 INFO:tensorflow:Step 34300 | Loss: 3.3099 | Spent: 56.4 secs | LR: 0.000012 INFO:tensorflow:Step 34400 | Loss: 3.4717 | Spent: 56.2 secs | LR: 0.000012 INFO:tensorflow:Step 34500 | Loss: 3.7465 | Spent: 56.6 secs | LR: 0.000012 INFO:tensorflow:Step 34600 | Loss: 3.1300 | Spent: 56.3 secs | LR: 0.000012 INFO:tensorflow:Step 34700 | Loss: 3.5147 | Spent: 56.4 secs | LR: 0.000012 INFO:tensorflow:Step 34800 | Loss: 3.2946 | Spent: 56.2 secs | LR: 0.000012 INFO:tensorflow:Step 34900 | Loss: 3.6527 | Spent: 56.0 secs | LR: 0.000012 INFO:tensorflow:Step 35000 | Loss: 3.4655 | Spent: 56.2 secs | LR: 0.000012 INFO:tensorflow:Step 35100 | Loss: 3.5019 | Spent: 56.8 secs | LR: 0.000012 INFO:tensorflow:Step 35200 | Loss: 3.3319 | Spent: 54.7 secs | LR: 0.000012 INFO:tensorflow:Step 35300 | Loss: 3.2019 | Spent: 56.6 secs | LR: 0.000012 INFO:tensorflow:Step 35400 | Loss: 3.4453 | Spent: 55.2 secs | LR: 0.000012 INFO:tensorflow:Step 35500 | Loss: 3.6620 | Spent: 55.6 secs | LR: 0.000012 INFO:tensorflow:Step 35600 | Loss: 3.7082 | Spent: 55.6 secs | LR: 0.000012 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.690 INFO:tensorflow:Best Accuracy: 0.693 Reading ../data/train.txt INFO:tensorflow:Step 35700 | Loss: 3.3247 | Spent: 108.3 secs | LR: 0.000013 INFO:tensorflow:Step 35800 | Loss: 3.3083 | Spent: 55.5 secs | LR: 0.000013 INFO:tensorflow:Step 35900 | Loss: 3.1719 | Spent: 56.7 secs | LR: 0.000013 INFO:tensorflow:Step 36000 | Loss: 3.4384 | Spent: 56.1 secs | LR: 0.000013 INFO:tensorflow:Step 36100 | Loss: 3.5294 | Spent: 56.2 secs | LR: 0.000013 INFO:tensorflow:Step 36200 | Loss: 3.3919 | Spent: 56.3 secs | LR: 0.000013 INFO:tensorflow:Step 36300 | Loss: 3.6008 | Spent: 55.5 secs | LR: 0.000013 INFO:tensorflow:Step 36400 | Loss: 3.4085 | Spent: 56.5 secs | LR: 0.000013 INFO:tensorflow:Step 36500 | Loss: 3.5704 | Spent: 56.9 secs | LR: 0.000013 INFO:tensorflow:Step 36600 | Loss: 3.3474 | Spent: 56.6 secs | LR: 0.000013 INFO:tensorflow:Step 36700 | Loss: 3.3920 | Spent: 55.8 secs | LR: 0.000013 INFO:tensorflow:Step 36800 | Loss: 3.0878 | Spent: 56.5 secs | LR: 0.000013 INFO:tensorflow:Step 36900 | Loss: 2.8937 | Spent: 56.7 secs | LR: 0.000013 INFO:tensorflow:Step 37000 | Loss: 3.3712 | Spent: 56.8 secs | LR: 0.000013 INFO:tensorflow:Step 37100 | Loss: 3.5150 | Spent: 56.3 secs | LR: 0.000013 INFO:tensorflow:Step 37200 | Loss: 3.3100 | Spent: 56.4 secs | LR: 0.000013 INFO:tensorflow:Step 37300 | Loss: 2.9511 | Spent: 56.4 secs | LR: 0.000014 INFO:tensorflow:Step 37400 | Loss: 3.7832 | Spent: 56.2 secs | LR: 0.000014 INFO:tensorflow:Step 37500 | Loss: 3.5332 | Spent: 55.9 secs | LR: 0.000014 INFO:tensorflow:Step 37600 | Loss: 3.3874 | Spent: 56.6 secs | LR: 0.000014 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.686 INFO:tensorflow:Best Accuracy: 0.693 Reading ../data/train.txt INFO:tensorflow:Step 37700 | Loss: 3.1126 | Spent: 108.7 secs | LR: 0.000014 INFO:tensorflow:Step 37800 | Loss: 3.5773 | Spent: 56.5 secs | LR: 0.000014 INFO:tensorflow:Step 37900 | Loss: 3.3865 | Spent: 55.5 secs | LR: 0.000014 INFO:tensorflow:Step 38000 | Loss: 3.7670 | Spent: 55.8 secs | LR: 0.000014 INFO:tensorflow:Step 38100 | Loss: 3.4764 | Spent: 56.4 secs | LR: 0.000014 INFO:tensorflow:Step 38200 | Loss: 3.3747 | Spent: 56.5 secs | LR: 0.000014 INFO:tensorflow:Step 38300 | Loss: 3.4750 | Spent: 56.5 secs | LR: 0.000014 INFO:tensorflow:Step 38400 | Loss: 4.0073 | Spent: 56.3 secs | LR: 0.000014 INFO:tensorflow:Step 38500 | Loss: 3.0804 | Spent: 55.9 secs | LR: 0.000014 INFO:tensorflow:Step 38600 | Loss: 3.3201 | Spent: 55.9 secs | LR: 0.000014 INFO:tensorflow:Step 38700 | Loss: 3.4264 | Spent: 56.4 secs | LR: 0.000014 INFO:tensorflow:Step 38800 | Loss: 3.5607 | Spent: 56.8 secs | LR: 0.000014 INFO:tensorflow:Step 38900 | Loss: 3.3944 | Spent: 56.0 secs | LR: 0.000015 INFO:tensorflow:Step 39000 | Loss: 3.3954 | Spent: 56.2 secs | LR: 0.000015 INFO:tensorflow:Step 39100 | Loss: 3.0821 | Spent: 57.1 secs | LR: 0.000015 INFO:tensorflow:Step 39200 | Loss: 3.2155 | Spent: 56.7 secs | LR: 0.000015 INFO:tensorflow:Step 39300 | Loss: 3.4314 | Spent: 54.8 secs | LR: 0.000015 INFO:tensorflow:Step 39400 | Loss: 3.3212 | Spent: 55.9 secs | LR: 0.000015 INFO:tensorflow:Step 39500 | Loss: 3.5627 | Spent: 56.5 secs | LR: 0.000015 INFO:tensorflow:Step 39600 | Loss: 3.1086 | Spent: 56.3 secs | LR: 0.000015 Reading ../data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.681 INFO:tensorflow:Best Accuracy: 0.693 Reading ../data/train.txt
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-8-4c23c1b6df0f> in <module>() 27 for ((text, seg), (labels, labels_mask)) in dataset(is_training=True, params=params): 28 with tf.GradientTape() as tape: ---> 29 logits = model([text, tf.sign(text), seg], training=True).logits 30 loss = tf.compat.v1.losses.softmax_cross_entropy( 31 onehot_labels = tf.one_hot(labels, 21128), /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 983 984 with ops.enable_auto_cast_variables(self._compute_dtype_object): --> 985 outputs = call_fn(inputs, *args, **kwargs) 986 987 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, inputs, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict, labels, training) 1000 output_hidden_states=output_hidden_states, 1001 return_dict=return_dict, -> 1002 training=training, 1003 ) 1004 /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 983 984 with ops.enable_auto_cast_variables(self._compute_dtype_object): --> 985 outputs = call_fn(inputs, *args, **kwargs) 986 987 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, inputs, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict, training) 627 output_hidden_states, 628 return_dict, --> 629 training=training, 630 ) 631 /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 983 984 with ops.enable_auto_cast_variables(self._compute_dtype_object): --> 985 outputs = call_fn(inputs, *args, **kwargs) 986 987 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, hidden_states, attention_mask, head_mask, output_attentions, output_hidden_states, return_dict, training) 393 394 layer_outputs = layer_module( --> 395 hidden_states, attention_mask, head_mask[i], output_attentions, training=training 396 ) 397 hidden_states = layer_outputs[0] /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 983 984 with ops.enable_auto_cast_variables(self._compute_dtype_object): --> 985 outputs = call_fn(inputs, *args, **kwargs) 986 987 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, hidden_states, attention_mask, head_mask, output_attentions, training) 363 attention_output = attention_outputs[0] 364 intermediate_output = self.intermediate(attention_output) --> 365 layer_output = self.bert_output(intermediate_output, attention_output, training=training) 366 outputs = (layer_output,) + attention_outputs[1:] # add attentions if we output them 367 /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 983 984 with ops.enable_auto_cast_variables(self._compute_dtype_object): --> 985 outputs = call_fn(inputs, *args, **kwargs) 986 987 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, hidden_states, input_tensor, training) 344 hidden_states = self.dense(hidden_states) 345 hidden_states = self.dropout(hidden_states, training=training) --> 346 hidden_states = self.LayerNorm(hidden_states + input_tensor) 347 348 return hidden_states /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 983 984 with ops.enable_auto_cast_variables(self._compute_dtype_object): --> 985 outputs = call_fn(inputs, *args, **kwargs) 986 987 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/normalization.py in call(self, inputs) 1201 1202 # Calculate the moments on the last axis (layer activations). -> 1203 mean, variance = nn.moments(inputs, self.axis, keep_dims=True) 1204 1205 scale, offset = _broadcast(self.gamma), _broadcast(self.beta) /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs) 199 """Call target, and fall back on dispatchers if there is a TypeError.""" 200 try: --> 201 return target(*args, **kwargs) 202 except (TypeError, ValueError): 203 # Note: convert_to_eager_tensor currently raises a ValueError, not a /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in moments(x, axes, shift, name, keep_dims, keepdims) 1305 if keep_dims is None: 1306 keep_dims = False -> 1307 with ops.name_scope(name, "moments", [x, axes]): 1308 # The dynamic range of fp16 is too limited to support the collection of 1309 # sufficient statistics. As a workaround we simply perform the operations /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in name_scope(name, default_name, values, skip_on_eager) 6409 """ 6410 ctx = context.context() -> 6411 in_eager_mode = ctx.executing_eagerly() 6412 if not in_eager_mode: 6413 return internal_name_scope_v1(name, default_name, values) KeyboardInterrupt:
print(model.weights[5]) # for later check if the weight is correctly transferred to other task
<tf.Variable 'tf_bert_lm_head_model/bert/encoder/layer_._0/attention/self/query/kernel:0' shape=(768, 768) dtype=float32, numpy= array([[ 0.11825976, 0.01059594, 0.00478886, ..., -0.04938788, 0.01636124, 0.01673737], [-0.00918018, -0.00905642, -0.00512347, ..., 0.02321002, -0.09011449, -0.03552252], [ 0.0065572 , -0.00295785, 0.03182212, ..., 0.00119713, -0.04689885, 0.00050256], ..., [ 0.00973585, -0.00485693, 0.10374205, ..., 0.06686458, -0.03649763, 0.09566212], [ 0.00776388, 0.04061504, -0.03333197, ..., 0.00960802, 0.04296997, -0.02524005], [-0.08363243, 0.01113833, -0.00531893, ..., 0.07489353, -0.01407121, 0.01147269]], dtype=float32)>