!pip install fastai --upgrade from fastai.text.all import * import torch import torch.nn.functional as F path = untar_data(URLs.HUMAN_NUMBERS) lines = L() with open(path/'train.txt') as f: lines += L(*f.readlines()) with open(path/'valid.txt') as f: lines += L(*f.readlines()) lines text = ' . '.join([l.strip() for l in lines]) text[:100] tokens = text.split(' ') tokens[:10] vocab = L(*tokens).unique() vocab word2idx = {w:i for i,w in enumerate(vocab)} nums = L(word2idx[i] for i in tokens) nums L((tokens[i:i+3], tokens[i+3]) for i in range(0,len(tokens)-4,3)) seqs = L((tensor(nums[i:i+3]), nums[i+3]) for i in range(0,len(nums)-4,3)) seqs bs = 32 cut = int(len(seqs) * 0.8) dls = DataLoaders.from_dsets(seqs[:cut], seqs[cut:], bs=64, shuffle=False) class LMModel1(Module): def __init__(self, vocab_sz, n_hidden): self.i_h = nn.Embedding(vocab_sz, n_hidden) self.h_h = nn.Linear(n_hidden, n_hidden) self.h_o = nn.Linear(n_hidden, vocab_sz) def forward(self, x): h = F.relu(self.h_h(self.i_h(x[:, 0]))) h = h + self.i_h(x[:, 1]) h = F.relu(self.h_h(h)) h = h + self.i_h(x[:, 2]) h = F.relu(self.h_h(h)) return self.h_o(h) LMModel1(len(vocab), 3)(seqs[0][0].unsqueeze(0)) learn = Learner(dls, LMModel1(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy) learn.fit_one_cycle(4, 1e-3) class LMModel2(Module): def __init__(self, vocab_sz, n_hidden): self.i_h = nn.Embedding(vocab_sz, n_hidden) self.h_h = nn.Linear(n_hidden, n_hidden) self.h_o = nn.Linear(n_hidden, vocab_sz) def forward(self, x): h = 0 for i in range(3): h = h + self.i_h(x[:, i]) h = F.relu(self.h_h(h)) return self.h_o(h) LMModel2(len(vocab), 3)(seqs[0][0].unsqueeze(0)) learn = Learner(dls, LMModel2(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy) learn.fit_one_cycle(4, 1e-3) class LMModel3(Module): def __init__(self, vocab_sz, n_hidden): self.i_h = nn.Embedding(vocab_sz, n_hidden) self.h_h = nn.Linear(n_hidden, n_hidden) self.h_o = nn.Linear(n_hidden, vocab_sz) self.h = 0 def forward(self, x): for i in range(3): self.h = self.h + self.i_h(x[:, i]) self.h = F.relu(self.h_h(self.h)) out = self.h_o(self.h) self.h = self.h.detach() return out def reset(self): self.h = 0 LMModel3(len(vocab), 3)(seqs[0][0].unsqueeze(0)) m = len(seqs)//bs m, bs, len(seqs) def group_chunks(ds, bs): m = len(ds) // bs new_ds = L() for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs)) return new_ds s = seqs[:21024] s cut = int(len(seqs) * 0.8) d = DataLoaders.from_dsets(s[:cut], s[cut:], bs=64, shuffle=False) cut = int(len(seqs) * 0.8) dls = DataLoaders.from_dsets(group_chunks(seqs[:cut], 64), group_chunks(seqs[cut:], 64), bs=64, shuffle=False) learn = Learner(dls, LMModel3(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy, cbs=ModelResetter) learn.fit_one_cycle(4, 1e-3) sl = 16 seqs = L((tensor(nums[i: i+sl]), tensor(nums[i+1: i+sl+1])) for i in range(0, len(nums), sl)) cut = int(len(seqs)*0.8) dls = DataLoaders.from_dsets(group_chunks(seqs[:cut], 64), group_chunks(seqs[cut:], 64), bs=bs, drop_last=True, shuffle=False) class LMModel4(Module): def __init__(self, vocab_sz, n_hidden): self.i_h = nn.Embedding(vocab_sz, n_hidden) self.h_h = nn.Linear(n_hidden, n_hidden) self.h_o = nn.Linear(n_hidden, vocab_sz) self.h = 0 def forward(self, x): outs = [] for i in range(sl): self.h = self.h + self.i_h(x[:, i]) self.h = F.relu(self.h_h(self.h)) outs.append(self.h_o(self.h)) self.h = self.h.detach() return torch.stack(outs, dim=1) def reset(self): self.h = 0 LMModel4(len(vocab), 64)(seqs[0][0].unsqueeze(0)) def loss(inp, target): return F.cross_entropy(inp.view(-1, len(vocab)), target.view(-1)) learn = Learner(dls, LMModel4(len(vocab), 64), loss_func=loss, metrics=accuracy, cbs=ModelResetter) learn.fit_one_cycle(4, 1e-3) class LMModel5(Module): def __init__(self, vocab_sz, n_layers, n_hidden): self.i_h = nn.Embedding(vocab_sz, n_hidden) self.rnn = nn.RNN(n_hidden, 32, n_layers, batch_first=True) self.h_o = nn.Linear(32, vocab_sz) self.h = torch.zeros(n_layers, 32, 32) def forward(self, x): result, h = self.rnn(self.i_h(x), self.h) self.h = h.detach() return self.h_o(result) def reset(self): self.h.zero_() nn.Embedding(13, 4)(torch.tensor([0, 1, 4])) learn = Learner(dls, LMModel5(len(vocab), 64, 3), loss_func=CrossEntropyLossFlat(), metrics=accuracy, cbs=ModelResetter) learn.fit_one_cycle(4, 3e-3)