#hide
from fastai.gen_doc.nbdoc import *
from utils import *
from fastai2.collab import *
path = untar_data(URLs.ML_100k)
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
names=['user','movie','rating','timestamp'])
dls = CollabDataLoaders.from_df(ratings, item_name='movie', bs=64).cpu()
n_users,n_movies = len(dls.classes['user']),len(dls.classes['movie'])
n_factors = 5
import torch.nn as nn
import torch.nn.functional as F
user_embedding = (torch.randn(n_users, n_factors) * 0.1).requires_grad_()
movie_embedding = (torch.randn(n_movies, n_factors) * 0.1).requires_grad_()
user_bias = torch.zeros(n_users).requires_grad_()
movie_bias = torch.zeros(n_movies).requires_grad_()
x,y = dls.one_batch()
x.shape,y.shape
(torch.Size([64, 2]), torch.Size([64, 1]))
users,movies = x.T
users.shape,movies.shape
(torch.Size([64]), torch.Size([64]))
usr_emb = user_embedding[users]
usr_b = user_bias[users]
mov_emb = movie_embedding[movies]
mov_b = movie_bias[movies]
usr_emb.size()
torch.Size([64, 5])
activation = (usr_emb * mov_emb).sum(dim=1) + usr_b + mov_b
output = 5 * torch.sigmoid(activation)
def model(x):
users,movies = x.T
usr_emb,usr_b = user_embedding[users],user_bias[users]
mov_emb,mov_b = movie_embedding[movies],movie_bias[movies]
activation = (usr_emb * mov_emb).sum(dim=1) + usr_b + mov_b
return 5 * torch.sigmoid(activation)
def mse_loss(output, target): return (output-target).pow(2).mean()
loss = mse_loss(output,y)
loss.backward()
user_embedding.grad
tensor([[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], ..., [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]])
lr = 1e-2
user_embedding.data -= lr * user_embedding.grad
def fit_one_epoch(model, dls, lr):
for x,y in dls.train:
output = model(x)
loss = mse_loss(output,y)
loss.backward()
for param in [user_embedding, movie_embedding, user_bias, movie_bias]:
param.data -= lr * param.grad
param.grad.zero_()
def validate(model, dls):
n_elem,loss = 0,0.
with torch.no_grad():
for x,y in dls.valid:
output = model(x)
loss += mse_loss(output,y) * y.size(0)
n_elem += y.size(0)
return loss/n_elem
validate(model, dls)
tensor(2.3391)
def fit(model, dls, n_epoch, lr):
for i in progress_bar(range(n_epoch)):
fit_one_epoch(model, dls, lr)
val_loss = validate(model, dls)
print(f'Epoch {i+1}, validation loss: {val_loss:.6f}')
fit(model, dls, 5, 3e-2)
Epoch 1, validation loss: 1.759184 Epoch 2, validation loss: 1.535529 Epoch 3, validation loss: 1.434513 Epoch 4, validation loss: 1.382121 Epoch 5, validation loss: 1.351903
class DotProductBias(Module):
def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
self.user_factors = Embedding(n_users, n_factors)
self.user_bias = Embedding(n_users, 1)
self.movie_factors = Embedding(n_movies, n_factors)
self.movie_bias = Embedding(n_movies, 1)
self.y_range = y_range
def forward(self, x):
users = self.user_factors(x[:,0])
movies = self.movie_factors(x[:,1])
res = (users * movies).sum(dim=1, keepdim=True)
res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
return sigmoid_range(res, *self.y_range)
model = DotProductBias(n_users, n_movies, n_factors)
out = model(x)
loss = F.mse_loss(out, y.float())
loss_func = nn.MSELoss()
loss = loss_func(out, y.float())
loss_func = MSELossFlat()
loss = loss_func(out, y)
def fit_one_epoch(model, dls, lr):
for x,y in dls.train:
output = model(x)
loss = loss_func(output,y)
loss.backward()
for param in model.parameters(): param.data -= lr * param.grad
model.zero_grad()
out = out.cuda()
model = model.cuda()
out = out.to(default_device())
model = model.to(default_device())
default_device()
device(type='cuda', index=5)
dls = CollabDataLoaders.from_df(ratings, item_name='movie', bs=64)
fit_one_epoch(model, dls, 1e-3)
import torch.optim as optim
opt = optim.SGD(model.parameters(), lr=1e-3)
def fit_one_epoch(model, opt, dls):
for x,y in dls.train:
output = model(x)
loss = loss_func(output,y)
loss.backward()
opt.step()
opt.zero_grad()
loss_func = MSELossFlat(reduction='sum')
def validate(model, dls):
n_elem,loss = 0,0.
with torch.no_grad():
for x,y in dls.valid:
output = model(x)
loss += loss_func(output,y)
n_elem += y.size(0)
return loss/n_elem
def fit(model, dls, n_epoch, lr, opt_func=optim.SGD):
opt = opt_func(model.parameters(), lr=lr)
for i in progress_bar(range(n_epoch)):
fit_one_epoch(model, opt, dls)
val_loss = validate(model, dls)
print(f'Epoch {i+1}, validation loss: {val_loss:.6f}')
model = DotProductBias(n_users, n_movies, n_factors).cuda()
fit(model, dls, 4, 1e-3, opt_func=optim.Adam)
Epoch 1, validation loss: 1.034514 Epoch 2, validation loss: 0.909997 Epoch 3, validation loss: 0.883697 Epoch 4, validation loss: 0.869609
from fastai.vision import *
path = untar_data(URLs.MNIST_SAMPLE)
dls = ImageDataLoaders.from_folder(path)
dls.show_batch(rows=3, figsize=(6,6))
def fit_one_epoch(model, opt, dls, loss_func):
for x,y in dls.train:
output = model(x)
loss = loss_func(output,y)
loss.backward()
opt.step()
opt.zero_grad()
def validate(model, dls, loss_func):
n_elem,loss,corrects = 0,0.,0.
with torch.no_grad():
for x,y in dls.valid:
output = model(x)
loss += loss_func(output, y, reduction='sum')
corrects += (output.argmax(dim=1) == y).float().sum()
n_elem += y.size(0)
return loss/n_elem,corrects/n_elem
def fit(model, dls, n_epoch, lr, loss_func, opt_func=optim.SGD):
opt = opt_func(model.parameters(), lr=lr)
for i in progress_bar(range(n_epoch)):
fit_one_epoch(model, opt, dls, loss_func)
val_loss, acc = validate(model, dls, loss_func)
print(f'Epoch {i+1}, validation loss: {val_loss:.6f}, accuracy: {acc * 100:.2f}%')
class FirstNeuralNet(Module):
def __init__(self, n_in, n_hidden, n_out):
self.linear1 = nn.Linear(n_in, n_hidden)
self.linear2 = nn.Linear(n_hidden, n_out)
def forward(self, x):
x = x[:,0].view(x.size(0), -1)
activation = F.relu(self.linear1(x))
return self.linear2(activation)
model = FirstNeuralNet(28*28, 1000, 2).cuda()
fit(model, dls, 2, 1e-3, F.cross_entropy, opt_func=optim.Adam)
Epoch 1, validation loss: 0.035692, accuracy: 98.82% Epoch 2, validation loss: 0.020682, accuracy: 99.17%
def log_softmax(x): return (x.exp()/(x.exp().sum(-1,keepdim=True))).log()
x,y = dls.one_batch()
pred = model(x)
sm_pred = log_softmax(pred)
sm_pred[0,0].item(), sm_pred[1,0].item(), sm_pred[2,1].item()
(-12.337810516357422, -3.4572341442108154, -0.00045327682164497674)
sm_pred[[0,1,2], [0,0,1]]
tensor([-1.2338e+01, -3.4572e+00, -4.5328e-04], device='cuda:5', grad_fn=<IndexBackward>)
def nll(input, target): return -input[range(target.shape[0]), target].mean()
loss = nll(sm_pred, y)
loss
tensor(0.0014, device='cuda:5', grad_fn=<NegBackward>)
def log_softmax(x): return x - x.exp().sum(-1,keepdim=True).log()
def logsumexp(x):
m = x.max(-1)[0]
return m + (x-m[:,None]).exp().sum(-1).log()
logsumexp(pred)[0], pred.logsumexp(-1)[0]
(tensor(5.9457, device='cuda:5', grad_fn=<SelectBackward>), tensor(5.9457, device='cuda:5', grad_fn=<SelectBackward>))
def log_softmax(x): return x - x.logsumexp(-1,keepdim=True)
F.nll_loss(F.log_softmax(pred, -1), y.cuda())
tensor(0.0014, device='cuda:5', grad_fn=<NllLossBackward>)
F.cross_entropy(pred, y.cuda())
tensor(0.0014, device='cuda:5', grad_fn=<NllLossBackward>)
class DropoutLayer(Module):
def __init__(self, p): self.p = p
def forward(self, x):
if self.training:
mask = x.new(*x.size()).bernoulli_(1-self.p).div_(1-self.p)
return mask * x
class BatchNormLayer(Module):
def __init__(self, nf, mom=0.1, eps=1e-5):
# NB: pytorch bn mom is opposite of what you'd expect!
self.mom,self.eps = mom,eps
self.mults = nn.Parameter(torch.ones (nf,))
self.adds = nn.Parameter(torch.zeros(nf,))
self.register_buffer('vars', torch.ones(1,nf))
self.register_buffer('means', torch.zeros(1,nf))
def update_stats(self, x):
m = x.mean(0, keepdim=True)
v = x.var (0, keepdim=True)
self.means.lerp_(m, self.mom)
self.vars.lerp_ (v, self.mom)
return m,v
def forward(self, x):
if self.training:
with torch.no_grad(): m,v = self.update_stats(x)
else: m,v = self.means,self.vars
x = (x-m) / (v+self.eps).sqrt()
return x*self.mults + self.adds
tst = BatchNormLayer(100)
tst.state_dict().keys()
odict_keys(['mults', 'adds', 'vars', 'means'])