This notebook shows how to fine-tune a pre-trained BERT model on the SQuAD.
*Note:* To run this notebook you will need to have access to GPU. The fine-tuning of the Reader was done with an AWS EC2 p3.2xlarge machine (GPU Tesla V100 16GB). It took about 2 hours to complete (2 epochs on SQuAD 1.1 train was enough to achieve SOTA results on SQuAD 1.1 dev).
import os
import torch
import joblib
from cdqa.reader import BertProcessor, BertQA
from cdqa.utils.download import download_squad
/home/supercalculateur/source/andre/cdqa-dev/env-cdqa/lib/python3.6/site-packages/tqdm/autonotebook/__init__.py:18: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console) " (e.g. in jupyter console)", TqdmExperimentalWarning) I1120 11:43:47.615704 140657575868224 file_utils.py:39] PyTorch version 1.2.0 available.
download_squad(dir='./data')
Downloading SQuAD v1.1 data... train-v1.1.json already downloaded dev-v1.1.json already downloaded Downloading SQuAD v2.0 data... train-v2.0.json already downloaded dev-v2.0.json already downloaded
train_processor = BertProcessor(do_lower_case=True, is_training=True)
train_examples, train_features = train_processor.fit_transform(X='./data/SQuAD_1.1/train-v1.1.json')
I1120 11:43:48.194295 140657575868224 tokenization_utils.py:375] loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/supercalculateur/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
reader = BertQA(train_batch_size=12,
learning_rate=3e-5,
num_train_epochs=2,
do_lower_case=True,
output_dir='models')
reader.fit(X=(train_examples, train_features))
I1120 11:43:53.164162 140657575868224 configuration_utils.py:152] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/supercalculateur/.cache/torch/transformers/distributed_-1/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c I1120 11:43:53.165523 140657575868224 configuration_utils.py:169] Model config { "attention_probs_dropout_prob": 0.1, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "num_labels": 2, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pruned_heads": {}, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 30522 } I1120 11:43:53.591548 140657575868224 modeling_utils.py:383] loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/supercalculateur/.cache/torch/transformers/distributed_-1/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157 I1120 11:43:55.430284 140657575868224 modeling_utils.py:453] Weights of BertForQuestionAnswering not initialized from pretrained model: ['qa_outputs.weight', 'qa_outputs.bias'] I1120 11:43:55.431005 140657575868224 modeling_utils.py:456] Weights from pretrained model not used in BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
HBox(children=(IntProgress(value=0, description='Epoch', max=2, style=ProgressStyle(description_width='initial…
HBox(children=(IntProgress(value=0, description='Iteration', max=4, style=ProgressStyle(description_width='ini…
HBox(children=(IntProgress(value=0, description='Iteration', max=4, style=ProgressStyle(description_width='ini…
BertQA(adam_epsilon=1e-08, bert_model='bert-base-uncased', do_lower_case=True, fp16=False, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=-1, loss_scale=0, max_answer_length=30, n_best_size=20, no_cuda=False, null_score_diff_threshold=0.0, num_train_epochs=2, output_dir='models', predict_batch_size=8, seed=42, server_ip='', server_port='', train_batch_size=12, verbose_logging=False, version_2_with_negative=False, warmup_proportion=0.1, warmup_steps=0)
reader.model.to('cpu')
reader.device = torch.device('cpu')
joblib.dump(reader, os.path.join(reader.output_dir, 'bert_qa.joblib'))