In [ ]:

!pip install fastai --upgrade

In [25]:

import fastai.collab as c
import fastai.tabular.all as t
import torch

import pandas as pd
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [26]:

path = c.untar_data(c.URLs.ML_100k)
path.ls()

Out[26]:

(#23) [Path('/root/.fastai/data/ml-100k/ub.base'),Path('/root/.fastai/data/ml-100k/u5.base'),Path('/root/.fastai/data/ml-100k/u5.test'),Path('/root/.fastai/data/ml-100k/u.info'),Path('/root/.fastai/data/ml-100k/u2.base'),Path('/root/.fastai/data/ml-100k/u3.base'),Path('/root/.fastai/data/ml-100k/ua.test'),Path('/root/.fastai/data/ml-100k/u4.test'),Path('/root/.fastai/data/ml-100k/mku.sh'),Path('/root/.fastai/data/ml-100k/u.genre')...]

In [27]:

df = pd.read_csv(path/'u.data', delimiter='\t', names=['user', 'movie', 'rating', 'timestamp'])
df.head()

Out[27]:

	user	movie	rating	timestamp
0	196	242	3	881250949
1	186	302	3	891717742
2	22	377	1	878887116
3	244	51	2	880606923
4	166	346	1	886397596

In [28]:

df.info(), df.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   user       100000 non-null  int64
 1   movie      100000 non-null  int64
 2   rating     100000 non-null  int64
 3   timestamp  100000 non-null  int64
dtypes: int64(4)
memory usage: 3.1 MB

Out[28]:

(None, (100000, 4))

In [29]:

movies = pd.read_csv(path/'u.item', delimiter='|', names=['movie', 'title'], encoding='latin-1', usecols=(0, 1))
movies.head()

Out[29]:

	movie	title
0	1	Toy Story (1995)
1	2	GoldenEye (1995)
2	3	Four Rooms (1995)
3	4	Get Shorty (1995)
4	5	Copycat (1995)

In [30]:

df = df.merge(movies)
df.head()

Out[30]:

	user	movie	rating	timestamp	title
0	196	242	3	881250949	Kolya (1996)
1	63	242	3	875747190	Kolya (1996)
2	226	242	5	883888671	Kolya (1996)
3	154	242	3	879138235	Kolya (1996)
4	306	242	5	876503793	Kolya (1996)

In [31]:

dls = c.CollabDataLoaders.from_df(
    df,
    user_name='user',
    item_name='title',
    rating_name='rating',
    bs=32
)

dls.show_batch()

	user	title	rating
0	303	Shall We Dance? (1937)	4
1	880	Get Shorty (1995)	4
2	355	Kolya (1996)	4
3	334	Sound of Music, The (1965)	2
4	826	Striking Distance (1993)	3
5	942	It Happened One Night (1934)	4
6	289	Time to Kill, A (1996)	3
7	405	My Left Foot (1989)	1
8	452	Fantasia (1940)	2
9	747	Thirty-Two Short Films About Glenn Gould (1993)	3

In [32]:

dls.classes

Out[32]:

{'title': (#1665) ['#na#',"'Til There Was You (1997)",'1-900 (1994)','101 Dalmatians (1996)','12 Angry Men (1957)','187 (1997)','2 Days in the Valley (1996)','20,000 Leagues Under the Sea (1954)','2001: A Space Odyssey (1968)','3 Ninjas: High Noon At Mega Mountain (1998)'...],
 'user': (#944) ['#na#',1,2,3,4,5,6,7,8,9...]}

In [33]:

n_users = len(dls.classes['user'])
n_mov = len(dls.classes['title'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
mov_factors = torch.randn(n_mov, n_factors)

In [34]:

user_factors.shape, mov_factors.shape

Out[34]:

(torch.Size([944, 5]), torch.Size([1665, 5]))

In [35]:

user_factors.t()@t.one_hot(3, n_users).float() 

Out[35]:

tensor([-0.6902,  0.9260, -1.2527,  0.5396,  0.1056])

In [36]:

user_factors[3]

Out[36]:

tensor([-0.6902,  0.9260, -1.2527,  0.5396,  0.1056])

Base line model¶

In [37]:

class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors):
    super().__init__()
    self.user_factors = c.Embedding(n_users, n_factors)
    self.movie_factors = c.Embedding(n_movies, n_factors)

  def forward(self, x):
    users = self.user_factors(x[:, 0])
    movies = self.movie_factors(x[:, 1])

    return (users*movies).sum(dim=1)

In [38]:

dls.one_batch()[0].shape, dls.one_batch()[1].shape

Out[38]:

(torch.Size([32, 2]), torch.Size([32, 1]))

In [39]:

model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())

In [40]:

learn.fit_one_cycle(5, 5e-3)

epoch	train_loss	valid_loss	time
0	1.280856	1.324047	00:16
1	1.139197	1.145741	00:16
2	0.951527	1.011723	00:16
3	0.780049	0.897130	00:16
4	0.747144	0.874317	00:16

Adding sigmoid range¶

In [41]:

class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = c.Embedding(n_users, n_factors)
    self.movie_factors = c.Embedding(n_movies, n_factors)
    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors(x[:, 0])
    movies = self.movie_factors(x[:, 1])

    return c.sigmoid_range((users*movies).sum(dim=1), *self.y_range)

In [42]:

model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch	train_loss	valid_loss	time
0	1.038298	1.009943	00:16
1	0.880899	0.922624	00:16
2	0.685187	0.892175	00:16
3	0.453028	0.901179	00:16
4	0.308590	0.908446	00:16

Adding bias parameter¶

In [43]:

class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = c.Embedding(n_users, n_factors)
    self.user_bias = c.Embedding(n_users, 1)

    self.movie_factors = c.Embedding(n_movies, n_factors)
    self.movie_bias = c.Embedding(n_movies, 1)

    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors(x[:, 0])
    movies = self.movie_factors(x[:, 1])

    out = (users*movies).sum(dim=1, keepdim=True)
    out += self.user_bias(x[:, 0]) + self.movie_bias(x[:, 1])

    return c.sigmoid_range(out, *self.y_range)

In [44]:

model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch	train_loss	valid_loss	time
0	0.981148	0.945925	00:18
1	0.842618	0.882195	00:18
2	0.604646	0.913617	00:18
3	0.407241	0.941966	00:18
4	0.237349	0.950696	00:18

In [45]:

model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch	train_loss	valid_loss	time
0	0.979508	0.941654	00:18
1	0.858030	0.898452	00:18
2	0.798919	0.857021	00:18
3	0.672152	0.827143	00:18
4	0.545060	0.824844	00:18

Embedding layer from scratch¶

In [46]:

def create_params(size): return torch.nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

In [47]:

create_params((3, 4))

Out[47]:

Parameter containing:
tensor([[ 0.0043, -0.0059, -0.0025, -0.0032],
        [ 0.0016, -0.0125,  0.0049,  0.0115],
        [ 0.0234,  0.0013, -0.0026, -0.0069]], requires_grad=True)

In [48]:

class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = create_params((n_users, n_factors))
    self.user_bias = create_params((n_users, 1))

    self.movie_factors = create_params((n_movies, n_factors))
    self.movie_bias = create_params((n_movies, 1))

    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors[x[:, 0]]
    movies = self.movie_factors[x[:, 1]]

    out = (users*movies).sum(dim=1, keepdim=True)
    out += self.user_bias[x[:, 0]] + self.movie_bias[x[:, 1]]

    return c.sigmoid_range(out, *self.y_range)

In [49]:

for p in DotProduct(3, 4, 5).parameters():
  print(p.shape)

torch.Size([3, 5])
torch.Size([3, 1])
torch.Size([4, 5])
torch.Size([4, 1])

In [50]:

model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch	train_loss	valid_loss	time
0	0.934559	0.959005	00:19
1	0.847594	0.889536	00:19
2	0.797565	0.851402	00:19
3	0.709136	0.822152	00:19
4	0.585161	0.821672	00:19

In [51]:

b = learn.model.movie_bias.squeeze()
idx = b.argsort()[:5]

[dls.classes['title'][i] for i in idx]

Out[51]:

['Children of the Corn: The Gathering (1996)',
 'Mortal Kombat: Annihilation (1997)',
 'Crow: City of Angels, The (1996)',
 'Island of Dr. Moreau, The (1996)',
 'Lawnmower Man 2: Beyond Cyberspace (1996)']

In [52]:

b = learn.model.movie_bias.squeeze()
idx = b.argsort(descending=True)[:5]

[dls.classes['title'][i] for i in idx]

Out[52]:

['Titanic (1997)',
 "Schindler's List (1993)",
 'Star Wars (1977)',
 'Silence of the Lambs, The (1991)',
 'L.A. Confidential (1997)']

Training using fastai collab_learner¶

In [53]:

learn = c.collab_learner(dls, n_factors=50, y_range=(0, 5.5))

In [54]:

learn.model, learn.loss_func

Out[54]:

(EmbeddingDotBias(
   (u_weight): Embedding(944, 50)
   (i_weight): Embedding(1665, 50)
   (u_bias): Embedding(944, 1)
   (i_bias): Embedding(1665, 1)
 ), FlattenedLoss of MSELoss())

In [55]:

learn.fit_one_cycle(5, 5e-5, wd=0.1)

epoch	train_loss	valid_loss	time
0	1.894737	1.829872	00:18
1	1.807373	1.755659	00:18
2	1.684563	1.698134	00:18
3	1.673523	1.668491	00:18
4	1.664241	1.663463	00:18

In [56]:

dls.classes['title'].o2i['2 Days in the Valley (1996)']

Out[56]:

In [57]:

(torch.nn.CosineSimilarity(dim=1)(learn.model.i_weight.weight[6][None], learn.model.i_weight.weight)).argsort(descending=True)[1]

Out[57]:

tensor(37, device='cuda:0')

In [58]:

dls.classes['title'][686]

Out[58]:

'Highlander (1986)'

Using deep learning¶

In [59]:

class CollabNN(c.Module):
  def __init__(self, user_sz, item_sz, n_act=100, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = c.Embedding(*user_sz)
    self.movie_factors = c.Embedding(*item_sz)
    self.layers = torch.nn.Sequential(
        torch.nn.Linear(user_sz[1]+item_sz[1], n_act),
        torch.nn.ReLU(),
        torch.nn.Linear(n_act, 1)
    )

    self.y_range = y_range

  def forward(self, x):
    embs = self.user_factors(x[:, 0]), self.movie_factors(x[:, 1])
    x = self.layers(torch.cat(embs, dim=1))

    return c.sigmoid_range(x, *self.y_range)

In [60]:

sz = c.get_emb_sz(dls)
sz

Out[60]:

[(944, 74), (1665, 102)]

In [61]:

model = CollabNN(*sz)

In [62]:

learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch	train_loss	valid_loss	time
0	0.942369	0.967605	00:20
1	0.902668	0.912845	00:20
2	0.868656	0.884016	00:20
3	0.841839	0.872575	00:20
4	0.764020	0.873379	00:20

In [63]:

learn = c.collab_learner(dls, use_nn=True, layers=[10, 50], y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch	train_loss	valid_loss	time
0	0.978604	0.979922	00:23
1	0.958372	0.930927	00:24
2	0.900048	0.904019	00:23
3	0.877367	0.876002	00:23
4	0.816585	0.871966	00:24

In [64]:

learn.model

Out[64]:

EmbeddingNN(
  (embeds): ModuleList(
    (0): Embedding(944, 74)
    (1): Embedding(1665, 102)
  )
  (emb_drop): Dropout(p=0.0, inplace=False)
  (bn_cont): BatchNorm1d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): LinBnDrop(
      (0): BatchNorm1d(176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=176, out_features=10, bias=False)
      (2): ReLU(inplace=True)
    )
    (1): LinBnDrop(
      (0): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=10, out_features=50, bias=False)
      (2): ReLU(inplace=True)
    )
    (2): LinBnDrop(
      (0): Linear(in_features=50, out_features=1, bias=True)
    )
    (3): SigmoidRange(low=0, high=5.5)
  )
)

In [65]:

CollabNN((944, 74), (1665, 102))

Out[65]:

CollabNN(
  (user_factors): Embedding(944, 74)
  (movie_factors): Embedding(1665, 102)
  (layers): Sequential(
    (0): Linear(in_features=176, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=1, bias=True)
  )
)

In [66]:

t.delegates

Out[66]:

<function fastcore.meta.delegates>

In [67]:

@t.delegates(t.TabularModel)
class EmbeddingNN(t.TabularModel):
  def __init__(self, emb_sz, layers, **kwargs):
    super().__init__(emb_sz, layers=layers, n_cont=0, out_sz=1, **kwargs)

kwargs && args¶

kwargs

In [68]:

def s(a, **kwargs): 
  return kwargs

In [69]:

s(2, s='dhdj', j=3)

Out[69]:

{'j': 3, 's': 'dhdj'}

args

In [70]:

def a(b, *size): return b, size

In [71]:

a(2, 3, 'klk', 5)

Out[71]:

(2, (3, 'klk', 5))

In [72]:

learn.show_results()

	user	title	rating	rating_pred
0	621	1366	2	4.168081
1	577	1498	4	3.707471
2	846	920	4	4.199018
3	793	785	2	2.995941
4	336	407	3	3.256781
5	394	1544	5	4.228620
6	345	1336	4	4.130409
7	815	1581	4	4.287453
8	99	552	4	4.037339

Using cross entropy loss¶

In [73]:

df1 = df.copy()
df1['rating'] = df1['rating']-1

In [74]:

dls = c.CollabDataLoaders.from_df(
    df1,
    user_name='user',
    item_name='title',
    rating_name='rating',
    bs=32
)

dls.show_batch()

	user	title	rating
0	891	Courage Under Fire (1996)	4
1	806	Raiders of the Lost Ark (1981)	4
2	790	Romeo Is Bleeding (1993)	1
3	311	Last of the Mohicans, The (1992)	2
4	790	Emma (1996)	1
5	151	Crimson Tide (1995)	2
6	489	Devil's Advocate, The (1997)	3
7	314	Pallbearer, The (1996)	1
8	411	Rear Window (1954)	4
9	483	Powder (1995)	1

In [75]:

class CollabNN(c.Module):
  def __init__(self, user_sz, item_sz, n_act=100):
    super().__init__()
    self.user_factors = c.Embedding(*user_sz)
    self.movie_factors = c.Embedding(*item_sz)
    self.layers = torch.nn.Sequential(
        torch.nn.Linear(user_sz[1]+item_sz[1], n_act),
        torch.nn.ReLU(),
        torch.nn.Linear(n_act, 5)
    )

  def forward(self, x):
    embs = self.user_factors(x[:, 0]), self.movie_factors(x[:, 1])
    x = self.layers(torch.cat(embs, dim=1))

    return x

In [76]:

model = CollabNN(*sz)
learn = c.Learner(dls, model, loss_func=c.CrossEntropyLossFlat(), metrics=c.accuracy)

In [77]:

learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch	train_loss	valid_loss	accuracy	time
0	1.284872	1.289579	0.418500	00:20
1	1.248881	1.264122	0.440350	00:20
2	1.215094	1.232578	0.447150	00:20
3	1.162452	1.226760	0.456100	00:20
4	1.091269	1.234871	0.454250	00:20