High-level RNN PyTorch Example¶

In [1]:

import os
import sys
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import torch.nn.init as init
from torch import autograd
from torch.autograd import Variable
from common.params_lstm import *
from common.utils import *

In [2]:

print("OS: ", sys.platform)
print("Python: ", sys.version)
print("PyTorch: ", torch.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
PyTorch:  0.2.0_4
Numpy:  1.13.3
GPU:  ['Tesla K80']

In [3]:

class SymbolModule(nn.Module):
    def __init__(self):
        super(SymbolModule, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=MAXFEATURES,
                                      embedding_dim=EMBEDSIZE)
        # If batch-first then input and output 
        # provided as (batch, seq, features)
        # Cudnn used by default if possible
        self.gru = nn.GRU(input_size=EMBEDSIZE, 
                          hidden_size=NUMHIDDEN, 
                          num_layers=1,
                          batch_first=True,
                          bidirectional=False)   
        self.l_out = nn.Linear(in_features=NUMHIDDEN*1,
                               out_features=2)

    def forward(self, x):
        x = self.embedding(x)
        h0 = Variable(torch.zeros(1, BATCHSIZE, NUMHIDDEN)).cuda()
        x, h = self.gru(x, h0)  # outputs, states
        # just get the last output state
        x = x[:,-1,:].squeeze()
        x = self.l_out(x)
        return x

In [4]:

def init_model(m):
    opt = optim.Adam(m.parameters(), lr=LR, betas=(BETA_1, BETA_2), eps=EPS)
    criterion = nn.CrossEntropyLoss()
    return opt, criterion

In [5]:

%%time
# Data into format for library
x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)
# Torch-specific
x_train = x_train.astype(np.int64)
x_test = x_test.astype(np.int64)
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
Trimming to 30000 max-features
Padding to length 150
(25000, 150) (25000, 150) (25000,) (25000,)
int64 int64 int64 int64
CPU times: user 5.66 s, sys: 309 ms, total: 5.97 s
Wall time: 5.98 s

In [6]:

%%time
sym = SymbolModule()
sym.cuda() # CUDA!

CPU times: user 2.12 s, sys: 481 ms, total: 2.6 s
Wall time: 2.74 s

In [7]:

%%time
optimizer, criterion = init_model(sym)

CPU times: user 107 µs, sys: 15 µs, total: 122 µs
Wall time: 125 µs

In [8]:

%%time
# 31s
# Sets training = True
sym.train()  
for j in range(EPOCHS):
    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Get samples
        data = Variable(torch.LongTensor(data).cuda())
        target = Variable(torch.LongTensor(target).cuda())
        # Init
        optimizer.zero_grad()
        # Forwards
        output = sym(data)
        # Loss
        loss = criterion(output, target)
        # Back-prop
        loss.backward()
        optimizer.step()
    # Log
    print(j)

0
1
2
CPU times: user 26.8 s, sys: 4.16 s, total: 31 s
Wall time: 31.2 s

In [9]:

%%time
# Test model
# Sets training = False
sym.eval()
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, target in yield_mb(x_test, y_test, BATCHSIZE):
    # Get samples
    data = Variable(torch.LongTensor(data).cuda())
    # Forwards
    output = sym(data)
    pred = output.data.max(1)[1].cpu().numpy().squeeze()
    # Collect results
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
    c += 1

CPU times: user 2.73 s, sys: 312 ms, total: 3.05 s
Wall time: 3.05 s

In [10]:

print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.862940705128