import torch import torch.nn as nn from torch.nn.init import xavier_normal_, constant_ from enum import Enum def set_color(log, color, highlight=True): color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white'] try: index = color_set.index(color) except: index = len(color_set) - 1 prev_log = '\033[' if highlight: prev_log += '1;3' else: prev_log += '0;3' prev_log += str(index) + 'm' return prev_log + log + '\033[0m' class ModelType(Enum): """Type of models. - ``GENERAL``: General Recommendation - ``SEQUENTIAL``: Sequential Recommendation - ``CONTEXT``: Context-aware Recommendation - ``KNOWLEDGE``: Knowledge-based Recommendation """ GENERAL = 1 SEQUENTIAL = 2 CONTEXT = 3 KNOWLEDGE = 4 TRADITIONAL = 5 DECISIONTREE = 6 class KGDataLoaderState(Enum): """States for Knowledge-based DataLoader. - ``RSKG``: Return both knowledge graph information and user-item interaction information. - ``RS``: Only return the user-item interaction. - ``KG``: Only return the triplets with negative examples in a knowledge graph. """ RSKG = 1 RS = 2 KG = 3 class EvaluatorType(Enum): """Type for evaluation metrics. - ``RANKING``: Ranking-based metrics like NDCG, Recall, etc. - ``VALUE``: Value-based metrics like AUC, etc. """ RANKING = 1 VALUE = 2 class InputType(Enum): """Type of Models' input. - ``POINTWISE``: Point-wise input, like ``uid, iid, label``. - ``PAIRWISE``: Pair-wise input, like ``uid, pos_iid, neg_iid``. """ POINTWISE = 1 PAIRWISE = 2 LISTWISE = 3 class FeatureType(Enum): """Type of features. - ``TOKEN``: Token features like user_id and item_id. - ``FLOAT``: Float features like rating and timestamp. - ``TOKEN_SEQ``: Token sequence features like review. - ``FLOAT_SEQ``: Float sequence features like pretrained vector. """ TOKEN = 'token' FLOAT = 'float' TOKEN_SEQ = 'token_seq' FLOAT_SEQ = 'float_seq' class FeatureSource(Enum): """Source of features. - ``INTERACTION``: Features from ``.inter`` (other than ``user_id`` and ``item_id``). - ``USER``: Features from ``.user`` (other than ``user_id``). - ``ITEM``: Features from ``.item`` (other than ``item_id``). - ``USER_ID``: ``user_id`` feature in ``inter_feat`` and ``user_feat``. - ``ITEM_ID``: ``item_id`` feature in ``inter_feat`` and ``item_feat``. - ``KG``: Features from ``.kg``. - ``NET``: Features from ``.net``. """ INTERACTION = 'inter' USER = 'user' ITEM = 'item' USER_ID = 'user_id' ITEM_ID = 'item_id' KG = 'kg' NET = 'net' class AbstractRecommender(nn.Module): r"""Base class for all models """ def __init__(self): self.logger = getLogger() super(AbstractRecommender, self).__init__() def calculate_loss(self, interaction): r"""Calculate the training loss for a batch data. Args: interaction (Interaction): Interaction class of the batch. Returns: torch.Tensor: Training loss, shape: [] """ raise NotImplementedError def predict(self, interaction): r"""Predict the scores between users and items. Args: interaction (Interaction): Interaction class of the batch. Returns: torch.Tensor: Predicted scores for given users and items, shape: [batch_size] """ raise NotImplementedError def full_sort_predict(self, interaction): r"""full sort prediction function. Given users, calculate the scores between users and all candidate items. Args: interaction (Interaction): Interaction class of the batch. Returns: torch.Tensor: Predicted scores for given users and all candidate items, shape: [n_batch_users * n_candidate_items] """ raise NotImplementedError def other_parameter(self): if hasattr(self, 'other_parameter_name'): return {key: getattr(self, key) for key in self.other_parameter_name} return dict() def load_other_parameter(self, para): if para is None: return for key, value in para.items(): setattr(self, key, value) def __str__(self): """ Model prints with number of trainable parameters """ model_parameters = filter(lambda p: p.requires_grad, self.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) return super().__str__() + set_color('\nTrainable parameters', 'blue') + f': {params}' class GeneralRecommender(AbstractRecommender): """This is a abstract general recommender. All the general model should implement this class. The base general recommender class provide the basic dataset and parameters information. """ type = ModelType.GENERAL def __init__(self, config, dataset): super(GeneralRecommender, self).__init__() # load dataset info self.USER_ID = config['USER_ID_FIELD'] self.ITEM_ID = config['ITEM_ID_FIELD'] self.NEG_ITEM_ID = config['NEG_PREFIX'] + self.ITEM_ID self.n_users = dataset.num(self.USER_ID) self.n_items = dataset.num(self.ITEM_ID) # load parameters info self.device = config['device'] def xavier_normal_initialization(module): r""" using `xavier_normal_`_ in PyTorch to initialize the parameters in nn.Embedding and nn.Linear layers. For bias in nn.Linear layers, using constant 0 to initialize. .. _`xavier_normal_`: https://pytorch.org/docs/stable/nn.init.html?highlight=xavier_normal_#torch.nn.init.xavier_normal_ Examples: >>> self.apply(xavier_normal_initialization) """ if isinstance(module, nn.Embedding): xavier_normal_(module.weight.data) elif isinstance(module, nn.Linear): xavier_normal_(module.weight.data) if module.bias is not None: constant_(module.bias.data, 0) class BPRLoss(nn.Module): """ BPRLoss, based on Bayesian Personalized Ranking Args: - gamma(float): Small value to avoid division by zero Shape: - Pos_score: (N) - Neg_score: (N), same shape as the Pos_score - Output: scalar. Examples:: >>> loss = BPRLoss() >>> pos_score = torch.randn(3, requires_grad=True) >>> neg_score = torch.randn(3, requires_grad=True) >>> output = loss(pos_score, neg_score) >>> output.backward() """ def __init__(self, gamma=1e-10): super(BPRLoss, self).__init__() self.gamma = gamma def forward(self, pos_score, neg_score): loss = -torch.log(self.gamma + torch.sigmoid(pos_score - neg_score)).mean() return loss class BPR(GeneralRecommender): r"""BPR is a basic matrix factorization model that be trained in the pairwise way. """ input_type = InputType.PAIRWISE def __init__(self, config, dataset): super(BPR, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] # define layers and loss self.user_embedding = nn.Embedding(self.n_users, self.embedding_size) self.item_embedding = nn.Embedding(self.n_items, self.embedding_size) self.loss = BPRLoss() # parameters initialization self.apply(xavier_normal_initialization) def get_user_embedding(self, user): r""" Get a batch of user embedding tensor according to input user's id. Args: user (torch.LongTensor): The input tensor that contains user's id, shape: [batch_size, ] Returns: torch.FloatTensor: The embedding tensor of a batch of user, shape: [batch_size, embedding_size] """ return self.user_embedding(user) def get_item_embedding(self, item): r""" Get a batch of item embedding tensor according to input item's id. Args: item (torch.LongTensor): The input tensor that contains item's id, shape: [batch_size, ] Returns: torch.FloatTensor: The embedding tensor of a batch of item, shape: [batch_size, embedding_size] """ return self.item_embedding(item) def forward(self, user, item): user_e = self.get_user_embedding(user) item_e = self.get_item_embedding(item) return user_e, item_e def calculate_loss(self, interaction): user = interaction[self.USER_ID] pos_item = interaction[self.ITEM_ID] neg_item = interaction[self.NEG_ITEM_ID] user_e, pos_e = self.forward(user, pos_item) neg_e = self.get_item_embedding(neg_item) pos_item_score, neg_item_score = torch.mul(user_e, pos_e).sum(dim=1), torch.mul(user_e, neg_e).sum(dim=1) loss = self.loss(pos_item_score, neg_item_score) return loss def predict(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] user_e, item_e = self.forward(user, item) return torch.mul(user_e, item_e).sum(dim=1) def full_sort_predict(self, interaction): user = interaction[self.USER_ID] user_e = self.get_user_embedding(user) all_item_e = self.item_embedding.weight score = torch.matmul(user_e, all_item_e.transpose(0, 1)) return score.view(-1)