# Lesson 7: Human numbers¶

In [1]:
from fastai.text import *

In [2]:
bs = 64


## Data¶

In [3]:
path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()

Out[3]:
[PosixPath('/home/ubuntu/.fastai/data/human_numbers/valid.txt'),
PosixPath('/home/ubuntu/.fastai/data/human_numbers/train.txt')]
In [5]:
def readnums(d): return [', '.join(o.strip() for o in open(path / d).readlines())]

In [6]:
train_txt = readnums('train.txt')
train_txt[0][:80]

Out[6]:
'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'
In [10]:
valid_txt = readnums('valid.txt')
valid_txt[0][-80:]

Out[10]:
' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'
In [12]:
train = TextList(train_txt, path=path)
valid = TextList(valid_txt, path=path)

src = ItemLists(path=path, train=train, valid=valid).label_for_lm()
data = src.databunch(bs=bs)

In [13]:
train[0].text[:80]

Out[13]:
'xxbos one , two , three , four , five , six , seven , eight , nine , ten , eleve'
In [14]:
len(data.valid_ds[0][0].data)

Out[14]:
13017
In [24]:
data.bptt, len(data.valid_dl)

Out[24]:
(70, 3)
In [26]:
13017/70/bs

Out[26]:
2.905580357142857
In [27]:
it = iter(data.valid_dl)
x1, y1 = next(it)
x2, y2 = next(it)
x3, y3 = next(it)
it.close()

In [28]:
x1.numel() + x2.numel() + x3.numel()

Out[28]:
12928
In [29]:
x1.shape, y1.shape

Out[29]:
(torch.Size([64, 95]), torch.Size([64, 95]))
In [30]:
x2.shape, y2.shape

Out[30]:
(torch.Size([64, 76]), torch.Size([64, 76]))
In [39]:
x1[:, 0]

Out[39]:
tensor([ 2,  8, 10, 10, 23, 18, 10, 22, 18,  8, 21, 18,  9, 20, 18,  9, 18, 18,
9, 15, 18,  9,  8, 18,  9,  8,  8,  9,  8,  9,  9,  8, 19, 19, 26, 10,
9,  8,  8, 22, 19, 13, 21, 19,  9, 20, 19,  9, 31, 19,  9, 16, 19,  9,
8, 19,  9,  8,  9,  9,  8, 10,  9,  8], device='cuda:0')
In [34]:
y1[:, 0]

Out[34]:
tensor([18, 18, 26, 11, 12, 10, 12, 13, 10, 18, 14, 10, 27, 15, 10, 26, 10, 10,
25,  8, 10, 24, 18, 10, 23, 18, 18, 22, 18, 18, 21, 18,  9, 10, 14, 11,
23, 19, 19, 11, 10,  9, 12, 10, 27, 13, 10, 26,  8, 10, 25,  9, 10, 24,
19, 10, 23, 19, 34, 22, 19, 19, 21, 19], device='cuda:0')
In [35]:
v = data.valid_ds.vocab

In [37]:
x1[0].shape

Out[37]:
torch.Size([95])
In [36]:
v.textify(x1[0])

Out[36]:
'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three'
In [40]:
v.textify(y1[0])

Out[40]:
'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three ,'
In [41]:
v.textify(x2[0])

Out[41]:
', eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two , eight thousand thirty three , eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight'
In [42]:
v.textify(x3[0])

Out[42]:
'thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty'
In [43]:
v.textify(x1[1])

Out[43]:
', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight'
In [44]:
v.textify(x2[1])

Out[44]:
'thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand seventy four , eight thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty'
In [45]:
v.textify(x3[1])

Out[45]:
', eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six ,'
In [46]:
v.textify(x3[-1])

Out[46]:
'one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine'
In [47]:
data.show_batch(ds_type=DatasetType.Valid)

idx text
0 xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand
1 , eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand
2 thousand eighty seven , eight thousand eighty eight , eight thousand eighty nine , eight thousand ninety , eight thousand ninety one , eight thousand ninety two , eight thousand ninety three , eight thousand ninety four , eight thousand ninety five , eight thousand ninety six , eight thousand ninety seven , eight thousand ninety eight , eight thousand ninety nine , eight thousand one hundred
3 thousand one hundred twenty three , eight thousand one hundred twenty four , eight thousand one hundred twenty five , eight thousand one hundred twenty six , eight thousand one hundred twenty seven , eight thousand one hundred twenty eight , eight thousand one hundred twenty nine , eight thousand one hundred thirty , eight thousand one hundred thirty one , eight thousand one hundred thirty two
4 fifty two , eight thousand one hundred fifty three , eight thousand one hundred fifty four , eight thousand one hundred fifty five , eight thousand one hundred fifty six , eight thousand one hundred fifty seven , eight thousand one hundred fifty eight , eight thousand one hundred fifty nine , eight thousand one hundred sixty , eight thousand one hundred sixty one , eight thousand

## Single fully connected model¶

In [48]:
data = src.databunch(bs=bs, bptt=3, max_len=0, p_bptt=1.)

In [49]:
x, y = data.one_batch()
x.shape, y.shape

Out[49]:
(torch.Size([64, 3]), torch.Size([64, 3]))
In [50]:
nv = len(v.itos)
nv

Out[50]:
38
In [51]:
nh = 64

In [52]:
def loss4(input, target): return F.cross_entropy(input, target[:, -1])
def acc4(input, target): return accuracy(input, target[:, -1])

In [53]:
class Model0(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)  # green arrow
self.h_h = nn.Linear(nh, nh)     # brown arrow
self.h_o = nn.Linear(nh, nv)     # blue arrow
self.bn = nn.BatchNorm1d(nh)

def forward(self, x):
h = self.bn(F.relu(self.i_h(x[:,0])))
if x.shape[1] > 1:
h = h + self.i_h(x[:,1])
h = self.bn(F.relu(self.h_h(h)))
if x.shape[1] > 2:
h = h + self.i_h(x[:,2])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)

In [54]:
learn = Learner(data, Model0(), loss_func=loss4, metrics=acc4)

In [55]:
learn.fit_one_cycle(6, 1e-4)

Total time: 00:12

epoch train_loss valid_loss acc4
1 3.609755 3.622831 0.046186
2 3.103226 3.217704 0.425781
3 2.523298 2.733683 0.449908
4 2.192890 2.447449 0.452665
5 2.065991 2.342171 0.454274
6 2.039980 2.326865 0.454274

## Same thing with a loop¶

In [56]:
class Model1(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)  # green arrow
self.h_h = nn.Linear(nh, nh)     # brown arrow
self.h_o = nn.Linear(nh, nv)     # blue arrow
self.bn = nn.BatchNorm1d(nh)

def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)

In [57]:
learn = Learner(data, Model1(), loss_func=loss4, metrics=acc4)

In [58]:
learn.fit_one_cycle(6, 1e-4)

Total time: 00:13

epoch train_loss valid_loss acc4
1 3.550785 3.565774 0.039062
2 2.994696 3.056980 0.434283
3 2.444730 2.576163 0.462546
4 2.147489 2.336781 0.463925
5 2.030240 2.252541 0.465533
6 2.005649 2.240313 0.465763

## Multi fully connected model¶

In [59]:
data = src.databunch(bs=bs, bptt=20)

In [60]:
x, y = data.one_batch()
x.shape, y.shape

Out[60]:
(torch.Size([64, 45]), torch.Size([64, 45]))
In [61]:
class Model2(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)  # green arrow
self.h_h = nn.Linear(nh, nh)     # brown arrow
self.h_o = nn.Linear(nh, nv)     # blue arrow
self.bn = nn.BatchNorm1d(nh)

def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
res = []
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.h_o(self.bn(h)))

In [62]:
learn = Learner(data, Model2(), metrics=accuracy)

In [63]:
learn.fit_one_cycle(10, 1e-4, pct_start=0.1)

Total time: 00:08

epoch train_loss valid_loss accuracy
1 3.613750 3.468091 0.094572
2 3.507750 3.366867 0.201829
3 3.378657 3.263256 0.307511
4 3.248644 3.172283 0.352421
5 3.129175 3.094844 0.377534
6 3.031686 3.033968 0.382643
7 2.954897 2.999621 0.390039
8 2.903233 2.998493 0.382097
9 2.870547 2.957848 0.398152
10 2.856550 2.963168 0.395220

## Maintain state¶

In [64]:
class Model3(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)  # green arrow
self.h_h = nn.Linear(nh, nh)     # brown arrow
self.h_o = nn.Linear(nh, nv)     # blue arrow
self.bn = nn.BatchNorm1d(nh)
self.h = torch.zeros(bs, nh).cuda()

def forward(self, x):
res = []
h = self.h
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.bn(h))
self.h = h.detach()
res = torch.stack(res, dim=1)
res = self.h_o(res)
return res

In [65]:
learn = Learner(data, Model3(), metrics=accuracy)

In [66]:
learn.fit_one_cycle(20, 3e-3)

Total time: 00:15

epoch train_loss valid_loss accuracy
1 3.498832 3.457415 0.145312
2 3.161868 2.874971 0.449906
3 2.515988 2.037722 0.467106
4 1.967317 2.110590 0.316073
5 1.672088 2.135250 0.337800
6 1.493022 2.155137 0.344380
7 1.335257 2.116041 0.394331
8 1.201654 2.299078 0.408730
9 1.090030 2.624311 0.427448
10 1.008497 2.462456 0.422197
11 0.971175 2.352604 0.437458
12 0.906372 2.458878 0.453475
13 0.843947 2.455768 0.461694
14 0.794068 2.469155 0.458606
15 0.754808 2.490652 0.453702
16 0.722530 2.597134 0.453636
17 0.689590 2.633052 0.452083
18 0.670493 2.525233 0.467502
19 0.656720 2.663035 0.460008
20 0.657596 2.545538 0.464172

## PyTorch nn.RNN¶

In [67]:
class Model4(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)
self.rnn = nn.RNN(nh, nh, batch_first=True)
self.h_o = nn.Linear(nh, nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(1, bs, nh).cuda()

def forward(self, x):
res, h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))

In [68]:
learn = Learner(data, Model4(), metrics=accuracy)

In [69]:
learn.fit_one_cycle(20, 3e-3)

Total time: 00:09

epoch train_loss valid_loss accuracy
1 3.488441 3.392691 0.156386
2 3.016769 2.516589 0.461222
3 2.346680 1.942615 0.467149
4 1.879993 2.002077 0.312014
5 1.631300 1.904240 0.432754
6 1.462227 1.904621 0.482044
7 1.300646 1.851880 0.492365
8 1.153744 1.653138 0.492104
9 1.008373 1.549363 0.494715
10 0.878932 1.600824 0.500036
11 0.777283 1.508291 0.520914
12 0.712406 1.532992 0.570639
13 0.626053 1.426348 0.569779
14 0.555844 1.715479 0.545101
15 0.499043 1.626162 0.542316
16 0.458131 1.536722 0.548794
17 0.438000 1.548677 0.545291
18 0.409515 1.462034 0.552396
19 0.394507 1.477735 0.554738
20 0.390583 1.518102 0.549247

## 2-layer GRU¶

In [70]:
class Model5(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)
self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
self.h_o = nn.Linear(nh, nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(2, bs, nh).cuda()

def forward(self, x):
res, h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))

In [71]:
learn = Learner(data, Model5(), metrics=accuracy)

In [72]:
learn.fit_one_cycle(10, 1e-2)

Total time: 00:05

epoch train_loss valid_loss accuracy
1 2.983626 2.321548 0.444593
2 1.880445 1.621104 0.546462
3 1.015526 1.040678 0.796203
4 0.525928 0.822173 0.829538
5 0.268591 1.000392 0.813538
6 0.140787 0.820461 0.842801
7 0.079602 0.882789 0.833222
8 0.047994 0.795663 0.843396
9 0.037621 0.872451 0.833012
10 0.030443 0.875417 0.833148