import torch
import torch.nn as nn
from torch.nn.init import xavier_normal_, constant_
from enum import Enum
def set_color(log, color, highlight=True):
color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white']
try:
index = color_set.index(color)
except:
index = len(color_set) - 1
prev_log = '\033['
if highlight:
prev_log += '1;3'
else:
prev_log += '0;3'
prev_log += str(index) + 'm'
return prev_log + log + '\033[0m'
class ModelType(Enum):
"""Type of models.
- ``GENERAL``: General Recommendation
- ``SEQUENTIAL``: Sequential Recommendation
- ``CONTEXT``: Context-aware Recommendation
- ``KNOWLEDGE``: Knowledge-based Recommendation
"""
GENERAL = 1
SEQUENTIAL = 2
CONTEXT = 3
KNOWLEDGE = 4
TRADITIONAL = 5
DECISIONTREE = 6
class KGDataLoaderState(Enum):
"""States for Knowledge-based DataLoader.
- ``RSKG``: Return both knowledge graph information and user-item interaction information.
- ``RS``: Only return the user-item interaction.
- ``KG``: Only return the triplets with negative examples in a knowledge graph.
"""
RSKG = 1
RS = 2
KG = 3
class EvaluatorType(Enum):
"""Type for evaluation metrics.
- ``RANKING``: Ranking-based metrics like NDCG, Recall, etc.
- ``VALUE``: Value-based metrics like AUC, etc.
"""
RANKING = 1
VALUE = 2
class InputType(Enum):
"""Type of Models' input.
- ``POINTWISE``: Point-wise input, like ``uid, iid, label``.
- ``PAIRWISE``: Pair-wise input, like ``uid, pos_iid, neg_iid``.
"""
POINTWISE = 1
PAIRWISE = 2
LISTWISE = 3
class FeatureType(Enum):
"""Type of features.
- ``TOKEN``: Token features like user_id and item_id.
- ``FLOAT``: Float features like rating and timestamp.
- ``TOKEN_SEQ``: Token sequence features like review.
- ``FLOAT_SEQ``: Float sequence features like pretrained vector.
"""
TOKEN = 'token'
FLOAT = 'float'
TOKEN_SEQ = 'token_seq'
FLOAT_SEQ = 'float_seq'
class FeatureSource(Enum):
"""Source of features.
- ``INTERACTION``: Features from ``.inter`` (other than ``user_id`` and ``item_id``).
- ``USER``: Features from ``.user`` (other than ``user_id``).
- ``ITEM``: Features from ``.item`` (other than ``item_id``).
- ``USER_ID``: ``user_id`` feature in ``inter_feat`` and ``user_feat``.
- ``ITEM_ID``: ``item_id`` feature in ``inter_feat`` and ``item_feat``.
- ``KG``: Features from ``.kg``.
- ``NET``: Features from ``.net``.
"""
INTERACTION = 'inter'
USER = 'user'
ITEM = 'item'
USER_ID = 'user_id'
ITEM_ID = 'item_id'
KG = 'kg'
NET = 'net'
class AbstractRecommender(nn.Module):
r"""Base class for all models
"""
def __init__(self):
self.logger = getLogger()
super(AbstractRecommender, self).__init__()
def calculate_loss(self, interaction):
r"""Calculate the training loss for a batch data.
Args:
interaction (Interaction): Interaction class of the batch.
Returns:
torch.Tensor: Training loss, shape: []
"""
raise NotImplementedError
def predict(self, interaction):
r"""Predict the scores between users and items.
Args:
interaction (Interaction): Interaction class of the batch.
Returns:
torch.Tensor: Predicted scores for given users and items, shape: [batch_size]
"""
raise NotImplementedError
def full_sort_predict(self, interaction):
r"""full sort prediction function.
Given users, calculate the scores between users and all candidate items.
Args:
interaction (Interaction): Interaction class of the batch.
Returns:
torch.Tensor: Predicted scores for given users and all candidate items,
shape: [n_batch_users * n_candidate_items]
"""
raise NotImplementedError
def other_parameter(self):
if hasattr(self, 'other_parameter_name'):
return {key: getattr(self, key) for key in self.other_parameter_name}
return dict()
def load_other_parameter(self, para):
if para is None:
return
for key, value in para.items():
setattr(self, key, value)
def __str__(self):
"""
Model prints with number of trainable parameters
"""
model_parameters = filter(lambda p: p.requires_grad, self.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
return super().__str__() + set_color('\nTrainable parameters', 'blue') + f': {params}'
class GeneralRecommender(AbstractRecommender):
"""This is a abstract general recommender. All the general model should implement this class.
The base general recommender class provide the basic dataset and parameters information.
"""
type = ModelType.GENERAL
def __init__(self, config, dataset):
super(GeneralRecommender, self).__init__()
# load dataset info
self.USER_ID = config['USER_ID_FIELD']
self.ITEM_ID = config['ITEM_ID_FIELD']
self.NEG_ITEM_ID = config['NEG_PREFIX'] + self.ITEM_ID
self.n_users = dataset.num(self.USER_ID)
self.n_items = dataset.num(self.ITEM_ID)
# load parameters info
self.device = config['device']
def xavier_normal_initialization(module):
r""" using `xavier_normal_`_ in PyTorch to initialize the parameters in
nn.Embedding and nn.Linear layers. For bias in nn.Linear layers,
using constant 0 to initialize.
.. _`xavier_normal_`:
https://pytorch.org/docs/stable/nn.init.html?highlight=xavier_normal_#torch.nn.init.xavier_normal_
Examples:
>>> self.apply(xavier_normal_initialization)
"""
if isinstance(module, nn.Embedding):
xavier_normal_(module.weight.data)
elif isinstance(module, nn.Linear):
xavier_normal_(module.weight.data)
if module.bias is not None:
constant_(module.bias.data, 0)
class BPRLoss(nn.Module):
""" BPRLoss, based on Bayesian Personalized Ranking
Args:
- gamma(float): Small value to avoid division by zero
Shape:
- Pos_score: (N)
- Neg_score: (N), same shape as the Pos_score
- Output: scalar.
Examples::
>>> loss = BPRLoss()
>>> pos_score = torch.randn(3, requires_grad=True)
>>> neg_score = torch.randn(3, requires_grad=True)
>>> output = loss(pos_score, neg_score)
>>> output.backward()
"""
def __init__(self, gamma=1e-10):
super(BPRLoss, self).__init__()
self.gamma = gamma
def forward(self, pos_score, neg_score):
loss = -torch.log(self.gamma + torch.sigmoid(pos_score - neg_score)).mean()
return loss
class BPR(GeneralRecommender):
r"""BPR is a basic matrix factorization model that be trained in the pairwise way.
"""
input_type = InputType.PAIRWISE
def __init__(self, config, dataset):
super(BPR, self).__init__(config, dataset)
# load parameters info
self.embedding_size = config['embedding_size']
# define layers and loss
self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
self.item_embedding = nn.Embedding(self.n_items, self.embedding_size)
self.loss = BPRLoss()
# parameters initialization
self.apply(xavier_normal_initialization)
def get_user_embedding(self, user):
r""" Get a batch of user embedding tensor according to input user's id.
Args:
user (torch.LongTensor): The input tensor that contains user's id, shape: [batch_size, ]
Returns:
torch.FloatTensor: The embedding tensor of a batch of user, shape: [batch_size, embedding_size]
"""
return self.user_embedding(user)
def get_item_embedding(self, item):
r""" Get a batch of item embedding tensor according to input item's id.
Args:
item (torch.LongTensor): The input tensor that contains item's id, shape: [batch_size, ]
Returns:
torch.FloatTensor: The embedding tensor of a batch of item, shape: [batch_size, embedding_size]
"""
return self.item_embedding(item)
def forward(self, user, item):
user_e = self.get_user_embedding(user)
item_e = self.get_item_embedding(item)
return user_e, item_e
def calculate_loss(self, interaction):
user = interaction[self.USER_ID]
pos_item = interaction[self.ITEM_ID]
neg_item = interaction[self.NEG_ITEM_ID]
user_e, pos_e = self.forward(user, pos_item)
neg_e = self.get_item_embedding(neg_item)
pos_item_score, neg_item_score = torch.mul(user_e, pos_e).sum(dim=1), torch.mul(user_e, neg_e).sum(dim=1)
loss = self.loss(pos_item_score, neg_item_score)
return loss
def predict(self, interaction):
user = interaction[self.USER_ID]
item = interaction[self.ITEM_ID]
user_e, item_e = self.forward(user, item)
return torch.mul(user_e, item_e).sum(dim=1)
def full_sort_predict(self, interaction):
user = interaction[self.USER_ID]
user_e = self.get_user_embedding(user)
all_item_e = self.item_embedding.weight
score = torch.matmul(user_e, all_item_e.transpose(0, 1))
return score.view(-1)