#hide ! [ -e /content ] && pip install -Uqq fastbook import fastbook fastbook.setup_book() #hide from fastbook import * [[chapter_collab]] from fastai.collab import * from fastai.tabular.all import * path = untar_data(URLs.ML_100k) ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user','movie','rating','timestamp']) ratings.head() last_skywalker = np.array([0.98,0.9,-0.9]) user1 = np.array([0.9,0.8,-0.6]) (user1*last_skywalker).sum() casablanca = np.array([-0.99,-0.3,0.8]) (user1*casablanca).sum() movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1', usecols=(0,1), names=('movie','title'), header=None) movies.head() ratings = ratings.merge(movies) ratings.head() dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64) dls.show_batch() dls.classes n_users = len(dls.classes['user']) n_movies = len(dls.classes['title']) n_factors = 5 user_factors = torch.randn(n_users, n_factors) movie_factors = torch.randn(n_movies, n_factors) one_hot_3 = one_hot(3, n_users).float() user_factors.t() @ one_hot_3 user_factors[3] class Example: def __init__(self, a): self.a = a def say(self,x): return f'Hello {self.a}, {x}.' ex = Example('Sylvain') ex.say('nice to meet you') class DotProduct(Module): def __init__(self, n_users, n_movies, n_factors): self.user_factors = Embedding(n_users, n_factors) self.movie_factors = Embedding(n_movies, n_factors) def forward(self, x): users = self.user_factors(x[:,0]) movies = self.movie_factors(x[:,1]) return (users * movies).sum(dim=1) x,y = dls.one_batch() x.shape model = DotProduct(n_users, n_movies, 50) learn = Learner(dls, model, loss_func=MSELossFlat()) learn.fit_one_cycle(5, 5e-3) class DotProduct(Module): def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)): self.user_factors = Embedding(n_users, n_factors) self.movie_factors = Embedding(n_movies, n_factors) self.y_range = y_range def forward(self, x): users = self.user_factors(x[:,0]) movies = self.movie_factors(x[:,1]) return sigmoid_range((users * movies).sum(dim=1), *self.y_range) model = DotProduct(n_users, n_movies, 50) learn = Learner(dls, model, loss_func=MSELossFlat()) learn.fit_one_cycle(5, 5e-3) class DotProductBias(Module): def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)): self.user_factors = Embedding(n_users, n_factors) self.user_bias = Embedding(n_users, 1) self.movie_factors = Embedding(n_movies, n_factors) self.movie_bias = Embedding(n_movies, 1) self.y_range = y_range def forward(self, x): users = self.user_factors(x[:,0]) movies = self.movie_factors(x[:,1]) res = (users * movies).sum(dim=1, keepdim=True) res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1]) return sigmoid_range(res, *self.y_range) model = DotProductBias(n_users, n_movies, 50) learn = Learner(dls, model, loss_func=MSELossFlat()) learn.fit_one_cycle(5, 5e-3) #hide_input #id parabolas x = np.linspace(-2,2,100) a_s = [1,2,5,10,50] ys = [a * x**2 for a in a_s] _,ax = plt.subplots(figsize=(8,6)) for a,y in zip(a_s,ys): ax.plot(x,y, label=f'a={a}') ax.set_ylim([0,5]) ax.legend(); model = DotProductBias(n_users, n_movies, 50) learn = Learner(dls, model, loss_func=MSELossFlat()) learn.fit_one_cycle(5, 5e-3, wd=0.1) class T(Module): def __init__(self): self.a = torch.ones(3) L(T().parameters()) class T(Module): def __init__(self): self.a = nn.Parameter(torch.ones(3)) L(T().parameters()) class T(Module): def __init__(self): self.a = nn.Linear(1, 3, bias=False) t = T() L(t.parameters()) type(t.a.weight) def create_params(size): return nn.Parameter(torch.zeros(*size).normal_(0, 0.01)) class DotProductBias(Module): def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)): self.user_factors = create_params([n_users, n_factors]) self.user_bias = create_params([n_users]) self.movie_factors = create_params([n_movies, n_factors]) self.movie_bias = create_params([n_movies]) self.y_range = y_range def forward(self, x): users = self.user_factors[x[:,0]] movies = self.movie_factors[x[:,1]] res = (users*movies).sum(dim=1) res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]] return sigmoid_range(res, *self.y_range) model = DotProductBias(n_users, n_movies, 50) learn = Learner(dls, model, loss_func=MSELossFlat()) learn.fit_one_cycle(5, 5e-3, wd=0.1) movie_bias = learn.model.movie_bias.squeeze() idxs = movie_bias.argsort()[:5] [dls.classes['title'][i] for i in idxs] idxs = movie_bias.argsort(descending=True)[:5] [dls.classes['title'][i] for i in idxs] #hide_input #id img_pca_movie #caption Representation of movies based on two strongest PCA components #alt Representation of movies based on two strongest PCA components g = ratings.groupby('title')['rating'].count() top_movies = g.sort_values(ascending=False).index.values[:1000] top_idxs = tensor([learn.dls.classes['title'].o2i[m] for m in top_movies]) movie_w = learn.model.movie_factors[top_idxs].cpu().detach() movie_pca = movie_w.pca(3) fac0,fac1,fac2 = movie_pca.t() idxs = list(range(50)) X = fac0[idxs] Y = fac2[idxs] plt.figure(figsize=(12,12)) plt.scatter(X, Y) for i, x, y in zip(top_movies[idxs], X, Y): plt.text(x,y,i, color=np.random.rand(3)*0.7, fontsize=11) plt.show() learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5)) learn.fit_one_cycle(5, 5e-3, wd=0.1) learn.model movie_bias = learn.model.i_bias.weight.squeeze() idxs = movie_bias.argsort(descending=True)[:5] [dls.classes['title'][i] for i in idxs] movie_factors = learn.model.i_weight.weight idx = dls.classes['title'].o2i['Silence of the Lambs, The (1991)'] distances = nn.CosineSimilarity(dim=1)(movie_factors, movie_factors[idx][None]) idx = distances.argsort(descending=True)[1] dls.classes['title'][idx] embs = get_emb_sz(dls) embs class CollabNN(Module): def __init__(self, user_sz, item_sz, y_range=(0,5.5), n_act=100): self.user_factors = Embedding(*user_sz) self.item_factors = Embedding(*item_sz) self.layers = nn.Sequential( nn.Linear(user_sz[1]+item_sz[1], n_act), nn.ReLU(), nn.Linear(n_act, 1)) self.y_range = y_range def forward(self, x): embs = self.user_factors(x[:,0]),self.item_factors(x[:,1]) x = self.layers(torch.cat(embs, dim=1)) return sigmoid_range(x, *self.y_range) model = CollabNN(*embs) learn = Learner(dls, model, loss_func=MSELossFlat()) learn.fit_one_cycle(5, 5e-3, wd=0.01) learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100,50]) learn.fit_one_cycle(5, 5e-3, wd=0.1) @delegates(TabularModel) class EmbeddingNN(TabularModel): def __init__(self, emb_szs, layers, **kwargs): super().__init__(emb_szs, layers=layers, n_cont=0, out_sz=1, **kwargs)