!pip install fastai --upgrade
from fastai.text.all import *
import torch
import torch.nn.functional as F
path = untar_data(URLs.HUMAN_NUMBERS)
lines = L()
with open(path/'train.txt') as f: lines += L(*f.readlines())
with open(path/'valid.txt') as f: lines += L(*f.readlines())
lines
(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n'...]
text = ' . '.join([l.strip() for l in lines])
text[:100]
'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'
tokens = text.split(' ')
tokens[:10]
['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']
vocab = L(*tokens).unique()
vocab
(#30) ['one','.','two','three','four','five','six','seven','eight','nine'...]
word2idx = {w:i for i,w in enumerate(vocab)}
nums = L(word2idx[i] for i in tokens)
nums
(#63095) [0,1,2,1,3,1,4,1,5,1...]
L((tokens[i:i+3], tokens[i+3]) for i in range(0,len(tokens)-4,3))
(#21031) [(['one', '.', 'two'], '.'),(['.', 'three', '.'], 'four'),(['four', '.', 'five'], '.'),(['.', 'six', '.'], 'seven'),(['seven', '.', 'eight'], '.'),(['.', 'nine', '.'], 'ten'),(['ten', '.', 'eleven'], '.'),(['.', 'twelve', '.'], 'thirteen'),(['thirteen', '.', 'fourteen'], '.'),(['.', 'fifteen', '.'], 'sixteen')...]
seqs = L((tensor(nums[i:i+3]), nums[i+3]) for i in range(0,len(nums)-4,3))
seqs
(#21031) [(tensor([0, 1, 2]), 1),(tensor([1, 3, 1]), 4),(tensor([4, 1, 5]), 1),(tensor([1, 6, 1]), 7),(tensor([7, 1, 8]), 1),(tensor([1, 9, 1]), 10),(tensor([10, 1, 11]), 1),(tensor([ 1, 12, 1]), 13),(tensor([13, 1, 14]), 1),(tensor([ 1, 15, 1]), 16)...]
bs = 32
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(seqs[:cut], seqs[cut:], bs=64, shuffle=False)
class LMModel1(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden, vocab_sz)
def forward(self, x):
h = F.relu(self.h_h(self.i_h(x[:, 0])))
h = h + self.i_h(x[:, 1])
h = F.relu(self.h_h(h))
h = h + self.i_h(x[:, 2])
h = F.relu(self.h_h(h))
return self.h_o(h)
LMModel1(len(vocab), 3)(seqs[0][0].unsqueeze(0))
tensor([[-0.1084, -0.2783, -0.1223, -0.3544, -0.4660, -0.1827, -0.3859, 0.2552, -0.0022, -0.3058, 0.0698, -0.3552, 0.5132, -0.1932, 0.4968, 0.1891, 0.4565, -0.5759, -0.0737, -0.3763, 0.5565, 0.1311, 0.2966, 0.3392, -0.1113, 0.2586, 0.0560, -0.1836, 0.5182, -0.2767]], grad_fn=<AddmmBackward>)
learn = Learner(dls, LMModel1(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 1.789906 | 2.101264 | 0.443071 | 00:02 |
1 | 1.393594 | 1.828861 | 0.468029 | 00:02 |
2 | 1.410961 | 1.673940 | 0.492512 | 00:02 |
3 | 1.377922 | 1.698437 | 0.482529 | 00:02 |
class LMModel2(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden, vocab_sz)
def forward(self, x):
h = 0
for i in range(3):
h = h + self.i_h(x[:, i])
h = F.relu(self.h_h(h))
return self.h_o(h)
LMModel2(len(vocab), 3)(seqs[0][0].unsqueeze(0))
tensor([[ 0.0557, -0.3203, -0.8181, -0.6636, 0.1325, 0.0072, -0.0247, 0.6777, 0.1840, -0.4547, -0.0569, 0.2728, -0.2493, -0.0508, 0.1517, 0.5898, 0.2106, -0.7529, 0.9692, 0.6834, -0.4625, -0.0441, -0.2372, 0.4435, -0.0727, 0.1358, -0.0039, 0.1496, -0.6447, 0.3787]], grad_fn=<AddmmBackward>)
learn = Learner(dls, LMModel2(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 1.909757 | 1.930985 | 0.478013 | 00:02 |
1 | 1.469091 | 1.714454 | 0.479677 | 00:02 |
2 | 1.429843 | 1.672472 | 0.492988 | 00:02 |
3 | 1.390096 | 1.681135 | 0.465890 | 00:02 |
class LMModel3(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden, vocab_sz)
self.h = 0
def forward(self, x):
for i in range(3):
self.h = self.h + self.i_h(x[:, i])
self.h = F.relu(self.h_h(self.h))
out = self.h_o(self.h)
self.h = self.h.detach()
return out
def reset(self): self.h = 0
LMModel3(len(vocab), 3)(seqs[0][0].unsqueeze(0))
tensor([[-0.3256, -0.2493, -0.0404, 0.2820, 0.3655, 0.1891, -0.1301, -0.1874, -0.4694, -0.3767, -0.0546, -0.0693, -0.4469, 0.0875, 0.0070, 0.0787, 0.0223, -0.0287, 0.5465, -0.0721, -0.4811, -0.3768, 0.5216, -0.4914, 0.0082, 0.3935, -0.5356, -0.4153, 0.2180, 0.1427]], grad_fn=<AddmmBackward>)
m = len(seqs)//bs
m, bs, len(seqs)
(657, 32, 21031)
def group_chunks(ds, bs):
m = len(ds) // bs
new_ds = L()
for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs))
return new_ds
s = seqs[:21024]
s
(#21024) [(tensor([0, 1, 2]), 1),(tensor([1, 3, 1]), 4),(tensor([4, 1, 5]), 1),(tensor([1, 6, 1]), 7),(tensor([7, 1, 8]), 1),(tensor([1, 9, 1]), 10),(tensor([10, 1, 11]), 1),(tensor([ 1, 12, 1]), 13),(tensor([13, 1, 14]), 1),(tensor([ 1, 15, 1]), 16)...]
cut = int(len(seqs) * 0.8)
d = DataLoaders.from_dsets(s[:cut], s[cut:], bs=64, shuffle=False)
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(group_chunks(seqs[:cut], 64), group_chunks(seqs[cut:], 64), bs=64, shuffle=False)
learn = Learner(dls, LMModel3(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(4, 1e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 1.664295 | 1.806631 | 0.486058 | 00:02 |
1 | 1.342835 | 1.807223 | 0.417308 | 00:02 |
2 | 1.218265 | 1.683006 | 0.458654 | 00:02 |
3 | 1.168428 | 1.693239 | 0.459615 | 00:02 |
sl = 16
seqs = L((tensor(nums[i: i+sl]), tensor(nums[i+1: i+sl+1])) for i in range(0, len(nums), sl))
cut = int(len(seqs)*0.8)
dls = DataLoaders.from_dsets(group_chunks(seqs[:cut], 64),
group_chunks(seqs[cut:], 64),
bs=bs, drop_last=True, shuffle=False)
class LMModel4(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden, vocab_sz)
self.h = 0
def forward(self, x):
outs = []
for i in range(sl):
self.h = self.h + self.i_h(x[:, i])
self.h = F.relu(self.h_h(self.h))
outs.append(self.h_o(self.h))
self.h = self.h.detach()
return torch.stack(outs, dim=1)
def reset(self): self.h = 0
LMModel4(len(vocab), 64)(seqs[0][0].unsqueeze(0))
def loss(inp, target): return F.cross_entropy(inp.view(-1, len(vocab)), target.view(-1))
learn = Learner(dls, LMModel4(len(vocab), 64), loss_func=loss, metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(4, 1e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.511702 | 2.013124 | 0.420410 | 00:01 |
1 | 1.723667 | 2.002416 | 0.376790 | 00:01 |
2 | 1.575574 | 2.009366 | 0.364746 | 00:01 |
3 | 1.538547 | 2.018201 | 0.364909 | 00:01 |
class LMModel5(Module):
def __init__(self, vocab_sz, n_layers, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.rnn = nn.RNN(n_hidden, 32, n_layers, batch_first=True)
self.h_o = nn.Linear(32, vocab_sz)
self.h = torch.zeros(n_layers, 32, 32)
def forward(self, x):
result, h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(result)
def reset(self): self.h.zero_()
nn.Embedding(13, 4)(torch.tensor([0, 1, 4]))
tensor([[ 0.1739, 2.3553, -0.0054, 0.5570], [ 1.3818, 0.1635, -0.3897, -0.7589], [-1.4636, 0.6106, -1.7279, 0.9655]], grad_fn=<EmbeddingBackward>)
learn = Learner(dls, LMModel5(len(vocab), 64, 3),
loss_func=CrossEntropyLossFlat(),
metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(4, 3e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.847264 | 2.796035 | 0.151855 | 00:14 |
1 | 2.757411 | 2.792833 | 0.151855 | 00:14 |
2 | 2.745373 | 2.805568 | 0.151855 | 00:14 |
3 | 2.743357 | 2.806942 | 0.151855 | 00:14 |