import os
import sys
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import torch.nn.init as init
from torch import autograd
from torch.autograd import Variable
from common.params_lstm import *
from common.utils import *
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("PyTorch: ", torch.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
OS: linux Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] PyTorch: 0.2.0_4 Numpy: 1.13.3 GPU: ['Tesla K80']
class SymbolModule(nn.Module):
def __init__(self):
super(SymbolModule, self).__init__()
self.embedding = nn.Embedding(num_embeddings=MAXFEATURES,
embedding_dim=EMBEDSIZE)
# If batch-first then input and output
# provided as (batch, seq, features)
# Cudnn used by default if possible
self.gru = nn.GRU(input_size=EMBEDSIZE,
hidden_size=NUMHIDDEN,
num_layers=1,
batch_first=True,
bidirectional=False)
self.l_out = nn.Linear(in_features=NUMHIDDEN*1,
out_features=2)
def forward(self, x):
x = self.embedding(x)
h0 = Variable(torch.zeros(1, BATCHSIZE, NUMHIDDEN)).cuda()
x, h = self.gru(x, h0) # outputs, states
# just get the last output state
x = x[:,-1,:].squeeze()
x = self.l_out(x)
return x
def init_model(m):
opt = optim.Adam(m.parameters(), lr=LR, betas=(BETA_1, BETA_2), eps=EPS)
criterion = nn.CrossEntropyLoss()
return opt, criterion
%%time
# Data into format for library
x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)
# Torch-specific
x_train = x_train.astype(np.int64)
x_test = x_test.astype(np.int64)
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)
Preparing train set... Preparing test set... Trimming to 30000 max-features Padding to length 150 (25000, 150) (25000, 150) (25000,) (25000,) int64 int64 int64 int64 CPU times: user 5.66 s, sys: 309 ms, total: 5.97 s Wall time: 5.98 s
%%time
sym = SymbolModule()
sym.cuda() # CUDA!
CPU times: user 2.12 s, sys: 481 ms, total: 2.6 s Wall time: 2.74 s
%%time
optimizer, criterion = init_model(sym)
CPU times: user 107 µs, sys: 15 µs, total: 122 µs Wall time: 125 µs
%%time
# 31s
# Sets training = True
sym.train()
for j in range(EPOCHS):
for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
# Get samples
data = Variable(torch.LongTensor(data).cuda())
target = Variable(torch.LongTensor(target).cuda())
# Init
optimizer.zero_grad()
# Forwards
output = sym(data)
# Loss
loss = criterion(output, target)
# Back-prop
loss.backward()
optimizer.step()
# Log
print(j)
0 1 2 CPU times: user 26.8 s, sys: 4.16 s, total: 31 s Wall time: 31.2 s
%%time
# Test model
# Sets training = False
sym.eval()
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, target in yield_mb(x_test, y_test, BATCHSIZE):
# Get samples
data = Variable(torch.LongTensor(data).cuda())
# Forwards
output = sym(data)
pred = output.data.max(1)[1].cpu().numpy().squeeze()
# Collect results
y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
c += 1
CPU times: user 2.73 s, sys: 312 ms, total: 3.05 s Wall time: 3.05 s
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))
Accuracy: 0.862940705128