Notebook

Dependencies¶

In [2]:

# !pip install datasets wandb

In [3]:

# utils
import os
import torch
import tqdm

# data
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
from transformers import AutoConfig, AutoModel, AutoTokenizer

# model 
import torch.nn as nn

# training and evaluation
import wandb
import torch.nn.functional as F
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score, f1_score, classification_report

wandb: WARNING W&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable.

In [4]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda

Custom Dataset Class¶

In [5]:

# custom dataset class 
class ReviewDataset(Dataset):
    def __init__(self, tokenizer, data, text_field='text', label_field='label', max_len=512):
        self.tokenizer = tokenizer
        self.data = data
        self.text_field = text_field
        self.label_field = label_field
        self.max_len =  max_len
    
    def __len__(self):
        return len(self.data[self.text_field])
    
    def __getitem__(self, idx):
        text  = self.data[self.text_field][idx]
        target = self.data[self.label_field][idx]
        
        
        # encode the text and target into tensors return the attention masks as well
        encoding = self.tokenizer.encode_plus(
            text=text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True,
            padding='max_length'
        )
        
        return {
          'text': text,
          'input_ids': encoding['input_ids'].flatten(),
          'attention_mask': encoding['attention_mask'].flatten(),
          'targets': torch.tensor(target, dtype=torch.long)
        }
        

Classifier¶

In [7]:

class Classifier(torch.nn.Module):
    
    def __init__(self, model_name, num_classes=2):
        super(Classifier, self).__init__()
        
        # create the model config and BERT initialize the pretrained BERT, also layers wise outputs
        self.config = AutoConfig.from_pretrained(pretrained_model_name_or_path=model_name)
        self.base = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name)
        
        # classifier head [not useful]
        self.head = nn.Sequential(*[
            nn.Linear(in_features=self.config.hidden_size, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=num_classes)
        ])
    
    
    def forward(self, input_ids, attention_mask=None):
        
        # first output is top layer output, second output is context of input seq and third output will be layerwise token embeddings
        top_layer, pooled = None, self.base(input_ids, attention_mask)[0][:, 0]
        logits = self.head(pooled)
        return logits, pooled, top_layer
        

In [ ]:

Lightning Model¶

In [8]:

class Finetuner(pl.LightningModule):
    
    def __init__(self, config):
        super(Finetuner, self).__init__()
        
        # initialize the BERT model
        self.config = config
        self.model = Classifier(model_name=self.config['model_name'], num_classes=self.config['num_classes'])
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=self.config['model_name'])
    
    def forward(self, input_ids, attention_mask=None):
        logits, _, _ =  self.model(input_ids, attention_mask)
        return logits
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(params=self.parameters(), lr=self.config['lr'])
    
    def train_dataloader(self):
        # first 10% data reserved for validation
        data = load_dataset("csv", data_files=self.config['root_dir']+self.config['source'], split='train[10%:]')
        dataset = ReviewDataset(tokenizer=self.tokenizer, data=data, text_field=self.config['text_field'], label_field=self.config['label_field'], max_len=self.config['max_len'])                                                 
        loader = DataLoader(dataset=dataset, batch_size=self.config['batch_size'], shuffle=True)
        return loader
        
    def training_step(self, batch, batch_idx):                                            
        input_ids, attention_mask, targets =  batch['input_ids'], batch['attention_mask'], batch['targets']                                              
        logits = self(input_ids, attention_mask)
        loss = F.cross_entropy(logits, targets)                                          
        acc = accuracy_score(targets.cpu(), logits.argmax(dim=1).cpu())
        f1 = f1_score(targets.cpu(), logits.argmax(dim=1).cpu())                                                                     
        wandb.log({"Loss": loss, "Accuracy": torch.tensor([acc]), "F1":torch.tensor([f1])})
        return {"loss": loss, "accuracy": torch.tensor([acc]), "f1":torch.tensor([f1])}
    
    def val_dataloader(self):
        # first 10% data reserved for validation
        data = load_dataset("csv", data_files=self.config['root_dir']+self.config['source'], split='train[:10%]')
        dataset = ReviewDataset(tokenizer=self.tokenizer, data=data, text_field=self.config['text_field'], label_field=self.config['label_field'], max_len=self.config['max_len'])                                                 
        loader = DataLoader(dataset=dataset, batch_size=self.config['batch_size'], shuffle=False)
        return loader
        
    def validation_step(self, batch, batch_idx):
        input_ids, attention_mask, targets =  batch['input_ids'], batch['attention_mask'], batch['targets']
        logits = self(input_ids, attention_mask)
        loss = F.cross_entropy(logits, targets)
        acc = accuracy_score(targets.cpu(), logits.argmax(dim=1).cpu())
        f1 = f1_score(targets.cpu(), logits.argmax(dim=1).cpu())                                                                     
        wandb.log({"Val_loss": loss, "Val_accuracy": torch.tensor([acc]), "Val_f1":torch.tensor([f1])})
        return {"val_loss": loss, "val_accuracy": torch.tensor([acc]), "val_f1":torch.tensor([f1])}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_accuracy'] for x in outputs]).mean()
        avg_f_score = torch.stack([x['val_f1'] for x in outputs]).mean()
        
        wandb.log({"Val_loss":avg_loss, "Val_accuracy":avg_acc, "Val_f1":avg_f_score})
        return {'val_loss': avg_loss, 'val_accuracy': avg_acc, "val_f1":avg_f_score}
    
    def test_dataloader(self):
        # test data is same as validation data
        data = load_dataset("csv", data_files=self.config['root_dir']+self.config['source'], split='train[:10%]')
        dataset = ReviewDataset(tokenizer=self.tokenizer, data=data, text_field=self.config['text_field'], label_field=self.config['label_field'], max_len=self.config['max_len'])                                                 
        loader = DataLoader(dataset=dataset, batch_size=self.config['batch_size'], shuffle=False)
        return loader
    
    def test_step(self, batch, batch_idx):
        input_ids, attention_mask, targets = batch['input_ids'], batch['attention_mask'], batch['targets']
        logits = self(input_ids, attention_mask)
        loss = F.cross_entropy(logits, targets)
        acc = accuracy_score(targets.cpu(), logits.argmax(dim=1).cpu())
        f1 = f1_score(targets.cpu(), logits.argmax(dim=1).cpu())
        return {"test_loss":loss, "test_accuracy":torch.tensor([acc]), "test_f1":torch.tensor([f1])}
    
    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['test_accuracy'] for x in outputs]).mean()
        avg_f1 = torch.stack([x['test_f1'] for x in outputs]).mean()
        return {"test_loss":avg_loss, "test_accuracy":avg_acc, "test_f1":avg_f1}

    

Training¶

In [10]:

config = {
    
    # data
    "root_dir":"../input/amazonproductsreview/amazon-review/",
    "source":'books.csv',
    "targets":["dvd.csv", "electronics.csv", "kitchen_housewares.csv"],
    "max_len":512,
    "batch_size":8,
    "num_classes":2,
    "text_field":"review_text",
    "label_field":"sentiment",
    
    # model
    "model_name":'xlnet-base-cased',
    
    
    # training
    "lr":1e-5,
    "epochs":20,
    
    # logger and checkpoints
    "project":"pretrained-model-robustness",
    "run_name":"xlnet",
    "monitor":"val_accuracy",
    "min_delta":0.001,
    "filepath":"../working/{epoch}-{val_accuracy:4f}",
    "save_dir":"../working/",
    
}

In [11]:

logger = WandbLogger(
    name=config["run_name"],
    save_dir=config["save_dir"],
    project=config["project"],
    log_model=True,
)
early_stopping = EarlyStopping(
    monitor=config["monitor"],
    min_delta=config["min_delta"],
    patience=5,
)
checkpoints = ModelCheckpoint(
    filepath=config["filepath"],
    monitor=config["monitor"],
    save_top_k=1
)

/opt/conda/lib/python3.7/site-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: Checkpoint directory /kaggle/working exists and is not empty. With save_top_k=1, all files in this directory will be deleted when a checkpoint is saved!
  warnings.warn(*args, **kwargs)

In [13]:

trainer = pl.Trainer(
    logger=logger,
    gpus=[0],
    checkpoint_callback=checkpoints,
    default_root_dir="../working/",
    max_epochs=config["epochs"],
    callbacks=[early_stopping]
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

In [14]:

model = Finetuner(config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760.0, style=ProgressStyle(description_…

/opt/conda/lib/python3.7/site-packages/transformers/configuration_xlnet.py:212: FutureWarning: This config doesn't use attention memories, a core feature of XLNet. Consider setting `mem_len` to a non-zero value, for example `xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`, for accurate training performance as well as an order of magnitude faster inference. Starting from version 3.5.0, the default parameter will be 1024, following the implementation in https://arxiv.org/abs/1906.08237
  FutureWarning,

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=467042463.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=798011.0, style=ProgressStyle(descripti…

In [15]:

trainer.fit(model)

wandb: You can find your API key in your browser here: https://wandb.ai/authorize

wandb: Paste an API key from your profile and hit enter: ········

wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
wandb: wandb version 0.10.11 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade

Tracking run with wandb version 0.10.10
Syncing run xlnet to Weights & Biases (Documentation).
Project page: https://wandb.ai/macab/pretrained-model-robustness
Run page: https://wandb.ai/macab/pretrained-model-robustness/runs/3bz466av
Run data is saved locally in ../working/wandb/run-20201128_021800-3bz466av

  | Name  | Type       | Params
-------------------------------------
0 | model | Classifier | 116 M

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1562.0, style=ProgressStyle(description…

Using custom data configuration default

Downloading and preparing dataset csv/default-af39d8bf1cb33848 (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/csv/default-af39d8bf1cb33848/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2...

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-af39d8bf1cb33848/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2. Subsequent calls will reuse this data.

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

/opt/conda/lib/python3.7/site-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: The validation_epoch_end should not return anything as of 9.1.to log, use self.log(...) or self.write(...) directly in the LightningModule
  warnings.warn(*args, **kwargs)
Using custom data configuration default
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-af39d8bf1cb33848/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1465: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  average, "true nor predicted", 'F-score is', len(true_sum)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

wandb: WARNING Symlinked 0 file into the W&B run directory, call wandb.save again to sync new files.

Out[15]:

In [16]:

trainer.test(model)

Using custom data configuration default
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-af39d8bf1cb33848/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': tensor(0.8800, dtype=torch.float64),
 'test_f1': tensor(0.8675, dtype=torch.float64),
 'test_loss': tensor(0.4570, device='cuda:0')}
--------------------------------------------------------------------------------

/opt/conda/lib/python3.7/site-packages/pytorch_lightning/utilities/distributed.py:45: UserWarning: The testing_epoch_end should not return anything as of 9.1.to log, use self.log(...) or self.write(...) directly in the LightningModule
  warnings.warn(*args, **kwargs)

Out[16]:

[{'test_loss': 0.45704737305641174,
  'test_accuracy': 0.88,
  'test_f1': 0.8674862914862916}]

Load from checkpoint and test¶

In [17]:

l  = torch.load(f="../working/epoch=6-val_accuracy=0.915000.ckpt")
model.load_state_dict(l['state_dict'])

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-17-5e6a8c1e3fe5> in <module>
----> 1 l  = torch.load(f="../working/epoch=10-val_accuracy=0.895000.ckpt")
      2 model.load_state_dict(l['state_dict'])

/opt/conda/lib/python3.7/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
    569         pickle_load_args['encoding'] = 'utf-8'
    570 
--> 571     with _open_file_like(f, 'rb') as opened_file:
    572         if _is_zipfile(opened_file):
    573             # The zipfile reader is going to advance the current file position.

/opt/conda/lib/python3.7/site-packages/torch/serialization.py in _open_file_like(name_or_buffer, mode)
    227 def _open_file_like(name_or_buffer, mode):
    228     if _is_path(name_or_buffer):
--> 229         return _open_file(name_or_buffer, mode)
    230     else:
    231         if 'w' in mode:

/opt/conda/lib/python3.7/site-packages/torch/serialization.py in __init__(self, name, mode)
    208 class _open_file(_opener):
    209     def __init__(self, name, mode):
--> 210         super(_open_file, self).__init__(open(name, mode))
    211 
    212     def __exit__(self, *args):

FileNotFoundError: [Errno 2] No such file or directory: '../working/epoch=10-val_accuracy=0.895000.ckpt'

In [16]:

trainer.test(model)

Using custom data configuration default
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-c2d784cc01c6211e/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': tensor(0.8950, dtype=torch.float64),
 'test_f1': tensor(0.8734, dtype=torch.float64),
 'test_loss': tensor(0.4835, device='cuda:0')}
--------------------------------------------------------------------------------

Out[16]:

[{'test_loss': 0.4835154712200165,
  'test_accuracy': 0.895,
  'test_f1': 0.8734305694305696}]

Evaluating the model on different target distribution¶

In [17]:

def load_data(file, toknizer):
    data = load_dataset("csv", data_files=config['root_dir']+file)
    dataset = ReviewDataset(tokenizer=tokenizer, data=data['train'], text_field=config['text_field'], label_field=config['label_field'])
    data_loader = DataLoader(dataset=dataset, batch_size=config['batch_size'], shuffle=False)
    return data_loader

def test_fn(model, loader):
    
    y_true = []
    y_pred = []
    model.eval()
    for batch in tqdm.tqdm(loader):
        input_ids, attention_mask, targets = batch['input_ids'], batch['attention_mask'], batch['targets']
        logits = model(input_ids.to(device), attention_mask.to(device))
        y_true += targets.tolist()
        y_pred += logits.argmax(dim=1).cpu().tolist()
    
    return classification_report(y_true, y_pred)
    
    

In [18]:

tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=config['model_name'])

In [19]:

for target in config['targets']:
    loader = load_data(file=target, toknizer=tokenizer)
    report = test_fn(model.to(device), loader)
    print(f'Target Domain Name: {target}')
    print(report)
    del loader
    

Using custom data configuration default

Downloading and preparing dataset csv/default-884b52687aa4816e (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/csv/default-884b52687aa4816e/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2...

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

  0%|          | 0/248 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-884b52687aa4816e/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2. Subsequent calls will reuse this data.

100%|██████████| 248/248 [00:49<00:00,  5.02it/s]

Target Domain Name: dvd.csv
              precision    recall  f1-score   support

           0       0.87      0.88      0.87       991
           1       0.88      0.86      0.87       987

    accuracy                           0.87      1978
   macro avg       0.87      0.87      0.87      1978
weighted avg       0.87      0.87      0.87      1978

Using custom data configuration default

Downloading and preparing dataset csv/default-795561195826f6a7 (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/csv/default-795561195826f6a7/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2...

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

  0%|          | 0/250 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-795561195826f6a7/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2. Subsequent calls will reuse this data.

100%|██████████| 250/250 [00:48<00:00,  5.18it/s]

Target Domain Name: electronics.csv
              precision    recall  f1-score   support

           0       0.79      0.93      0.86       996
           1       0.92      0.76      0.83       999

    accuracy                           0.85      1995
   macro avg       0.86      0.85      0.84      1995
weighted avg       0.86      0.85      0.84      1995

Using custom data configuration default

Downloading and preparing dataset csv/default-900436761778b083 (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/csv/default-900436761778b083/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2...

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

  0%|          | 0/250 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-900436761778b083/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2. Subsequent calls will reuse this data.

100%|██████████| 250/250 [00:48<00:00,  5.18it/s]

Target Domain Name: kitchen_housewares.csv
              precision    recall  f1-score   support

           0       0.88      0.89      0.88      1000
           1       0.89      0.88      0.88       998

    accuracy                           0.88      1998
   macro avg       0.88      0.88      0.88      1998
weighted avg       0.88      0.88      0.88      1998

Test on source validation set again¶

In [20]:

# test data is same as validation data
data = load_dataset("csv", data_files=config['root_dir']+config['source'], split='train[:10%]')
dataset = ReviewDataset(tokenizer=tokenizer, data=data, text_field=config['text_field'], label_field=config['label_field'], max_len=config['max_len'])                                                 
loader = DataLoader(dataset=dataset, batch_size=config['batch_size'], shuffle=False)

Using custom data configuration default
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-c2d784cc01c6211e/0.0.0/2960f95a26e85d40ca41a230ac88787f715ee3003edaacb8b1f0891e9f04dda2)

In [21]:

report = test_fn(model.to(device), loader)
print(f'Source Domain Name: {config["source"]}')
print(report)

100%|██████████| 25/25 [00:04<00:00,  6.02it/s]

Source Domain Name: books.csv
              precision    recall  f1-score   support

           0       0.89      0.90      0.89        97
           1       0.90      0.89      0.90       102

    accuracy                           0.89       199
   macro avg       0.89      0.89      0.89       199
weighted avg       0.89      0.89      0.89       199

In [ ]: