!pip install -q recogym import numpy as np from numpy.random.mtrand import RandomState from scipy.special import logsumexp import scipy import pandas as pd from scipy.stats.distributions import beta from copy import deepcopy from scipy.sparse import csr_matrix from scipy.sparse.linalg import svds from itertools import chain from sklearn.neighbors import NearestNeighbors from IPython.display import display, HTML from matplotlib.ticker import FormatStrFormatter import gym, recogym from recogym import env_1_args, Configuration from recogym.agents import OrganicUserEventCounterAgent, organic_user_count_args from recogym.agents.organic_count import OrganicCount, organic_count_args, to_categorical from recogym import Configuration from recogym.agents import Agent from recogym.envs.observation import Observation from recogym.agents import RandomAgent, random_args from recogym import verify_agents, verify_agents_IPS from recogym.evaluate_agent import plot_verify_agents, verify_agents_recall_at_k from recogym.envs.session import OrganicSessions from recogym.envs.context import DefaultContext from recogym.envs.observation import Observation import matplotlib.pyplot as plt %matplotlib inline P = 2000 # Number of Products U = 2000 # Number of Users # You can overwrite environment arguments here env_1_args['random_seed'] = 42 env_1_args['num_products']= P env_1_args['phi_var']=0.0 env_1_args['number_of_flips']=P//2 env_1_args['sigma_mu_organic'] = 0.1 env_1_args['sigma_omega']=0.05 # Initialize the gym for the first time by calling .make() and .init_gym() env = gym.make('reco-gym-v1') env.init_gym(env_1_args) # env.reset() # Generate RecSys logs for U users reco_log = env.generate_logs(U) reco_log.head(20) n_events = reco_log.shape[0] n_organic = reco_log.loc[reco_log['z'] == 'organic'].shape[0] print('Training on {0} organic and {1} bandit events'.format(n_organic, n_events - n_organic)) def leave_one_out(reco_log, agent, last = False, N = 1, folds = 10): # 1. Extract all organic events reco_log = reco_log.loc[reco_log['z'] == 'organic'] # 2. For every user sequence - randomly sample out an item hits = [] for _ in range(folds): user_id = 0 history = [] session = OrganicSessions() agent.reset() for row in reco_log.itertuples(): # If we have a new user if row.u != user_id: if last: # Sample out last item index = len(history) - 1 else: # Sample out a random item from the history index = np.random.choice(len(history), replace = False) test = history[index] train = history[:index] + history[index + 1:] # 3. Recreate the user sequence without these items - Let the agent observe the incomplete sequence for t, v in list(train): session.next(DefaultContext(t, user_id), int(v)) # 4. Generate a top-N set of recommendations by letting the agent act # TODO - For now only works for N = 1 try: prob_a = agent.act(Observation(DefaultContext(t + 1, user_id), session), 0, False)['ps-a'] except: prob_a = [1 / P] * P # 5. Compute metrics checking whether the sampled test item is in the top-N try: hits.append(np.argmax(prob_a) == int(test[1])) except: hits.append(0) # Reset variables user_id = row.u history = [] session = OrganicSessions() agent.reset() # Save the organic interaction to the running average for the session history.append((row.t,row.v)) # Error analysis mean_hits = np.mean(hits) serr_hits = np.std(hits) / np.sqrt(len(hits)) low_bound = mean_hits - 1.96 * serr_hits upp_bound = mean_hits + 1.96 * serr_hits return mean_hits, low_bound, upp_bound def verify_agents_traditional(reco_log, agents, last = False, N = 1, folds = 10): # Placeholder DataFrame for result stat = { 'Agent': [], '0.025': [], '0.500' : [], '0.975': [], } # For every agent for agent_id in agents: # Compute HR@k mean, low, upp = leave_one_out(reco_log, agents[agent_id], last = last, N = N, folds = folds) stat['Agent'].append(agent_id) stat['0.025'].append(low) stat['0.500'].append(mean) stat['0.975'].append(upp) return pd.DataFrame().from_dict(stat) def compute_ips_weights(agent, reco_log): # Placeholder for return values rewards = [] # Labels for actions t_props = [] # Treatment propensities l_props = [] # Logging propensities # For every logged interaction user_id = 0 session = OrganicSessions() agent.reset() for row in reco_log.itertuples(): # If we have a new user if row.u != user_id: # Reset session = OrganicSessions() agent.reset() user_id = row.u # If we have an organic event if row.z == 'organic': session.next(DefaultContext(row.t, row.u), int(row.v)) else: prob_a = agent.act(Observation(DefaultContext(row.t, row.u), session), 0, False)['ps-a'] rewards.append(row.c) try: t_props.append(prob_a[int(row.a)]) except: t_props.append(0) l_props.append(row.ps) session = OrganicSessions() return np.asarray(rewards), np.asarray(t_props), np.asarray(l_props) def verify_agents_counterfactual(reco_log, agents, cap = 3): # Placeholder DataFrame for results IPS_stat = { 'Agent': [], '0.025': [], '0.500' : [], '0.975': [], } CIPS_stat = { 'Agent': [], '0.025': [], '0.500' : [], '0.975': [], } SNIPS_stat = { 'Agent': [], '0.025': [], '0.500' : [], '0.975': [], } # For every agent for agent_id in agents: # Get the rewards and propensities rewards, t_props, l_props = compute_ips_weights(agents[agent_id], reco_log) # Compute the sample weights - propensity ratios p_ratio = t_props / l_props # Effective sample size for E_t estimate (from A. Owen) n_e = len(rewards) * (np.mean(p_ratio) ** 2) / (p_ratio ** 2).mean() n_e = 0 if np.isnan(n_e) else n_e print("Effective sample size for agent {} is {}".format(str(agent_id), n_e)) # Critical value from t-distribution as we have unknown variance alpha = .00125 cv = scipy.stats.t.ppf(1 - alpha, df = int(n_e) - 1) ############### # VANILLA IPS # ############### # Expected reward for pi_t E_t = np.mean(rewards * p_ratio) # Variance of the estimate var = ((rewards * p_ratio - E_t) ** 2).mean() stddev = np.sqrt(var) # C.I. assuming unknown variance - use t-distribution and effective sample size min_bound = E_t - cv * stddev / np.sqrt(int(n_e)) max_bound = E_t + cv * stddev / np.sqrt(int(n_e)) # Store result IPS_stat['Agent'].append(agent_id) IPS_stat['0.025'].append(min_bound) IPS_stat['0.500'].append(E_t) IPS_stat['0.975'].append(max_bound) ############## # CAPPED IPS # ############## # Cap ratios p_ratio_capped = np.clip(p_ratio, a_min = None, a_max = cap) # Expected reward for pi_t E_t_capped = np.mean(rewards * p_ratio_capped) # Variance of the estimate var_capped = ((rewards * p_ratio_capped - E_t_capped) ** 2).mean() stddev_capped = np.sqrt(var_capped) # C.I. assuming unknown variance - use t-distribution and effective sample size min_bound_capped = E_t_capped - cv * stddev_capped / np.sqrt(int(n_e)) max_bound_capped = E_t_capped + cv * stddev_capped / np.sqrt(int(n_e)) # Store result CIPS_stat['Agent'].append(agent_id) CIPS_stat['0.025'].append(min_bound_capped) CIPS_stat['0.500'].append(E_t_capped) CIPS_stat['0.975'].append(max_bound_capped) ############## # NORMED IPS # ############## # Expected reward for pi_t E_t_normed = np.sum(rewards * p_ratio) / np.sum(p_ratio) # Variance of the estimate var_normed = np.sum(((rewards - E_t_normed) ** 2) * (p_ratio ** 2)) / (p_ratio.sum() ** 2) stddev_normed = np.sqrt(var_normed) # C.I. assuming unknown variance - use t-distribution and effective sample size min_bound_normed = E_t_normed - cv * stddev_normed / np.sqrt(int(n_e)) max_bound_normed = E_t_normed + cv * stddev_normed / np.sqrt(int(n_e)) # Store result SNIPS_stat['Agent'].append(agent_id) SNIPS_stat['0.025'].append(min_bound_normed) SNIPS_stat['0.500'].append(E_t_normed) SNIPS_stat['0.975'].append(max_bound_normed) return pd.DataFrame().from_dict(IPS_stat), pd.DataFrame().from_dict(CIPS_stat), pd.DataFrame().from_dict(SNIPS_stat) class SVDAgent(Agent): def __init__(self, config, U = U, P = P, K = 5): super(SVDAgent, self).__init__(config) self.rng = RandomState(self.config.random_seed) assert(P >= K) self.K = K self.R = csr_matrix((U,P)) self.V = np.zeros((P,K)) self.user_history = np.zeros(P) def train(self, reco_log, U = U, P = P): # Extract all organic user logs reco_log = reco_log.loc[reco_log['z'] == 'organic'] # Generate ratings matrix for training, row-based for efficient row (user) retrieval self.R = csr_matrix((np.ones(len(reco_log)), (reco_log['u'],reco_log['v'])), (U,P)) # Singular Value Decomposition _, _, self.V = svds(self.R, k = self.K) def observe(self, observation): for session in observation.sessions(): self.user_history[session['v']] += 1 def act(self, observation, reward, done): """Act method returns an Action based on current observation and past history""" self.observe(observation) scores = self.user_history.dot(self.V.T).dot(self.V) action = np.argmax(scores) prob = np.zeros_like(scores) prob[action] = 1.0 return { **super().act(observation, reward, done), **{ 'a': action, 'ps': prob[action], 'ps-a': prob, }, } def reset(self): self.user_history = np.zeros(P) class itemkNNAgent(Agent): def __init__(self, config, U = U, P = P, k = 5, greedy = False, alpha = 1): super(itemkNNAgent, self).__init__(config) self.rng = RandomState(self.config.random_seed) self.k = min(P,k) self.greedy = greedy self.alpha = alpha self.Rt = csr_matrix((P,U)) self.user_history = np.zeros(P) self.S = np.eye(P) def train(self, reco_log, U = U, P = P): # Extract all organic user logs reco_log = reco_log.loc[reco_log['z'] == 'organic'] # Generate ratings matrix for training, row-based for efficient row (user) retrieval self.R_t = csr_matrix((np.ones(len(reco_log)), (reco_log['v'],reco_log['u'])), (P,U)) # Set up nearest neighbours module nn = NearestNeighbors(n_neighbors = self.k, metric = 'cosine') # Initialise placeholder for distances and indices distances = [] indices = [] # Dirty fix for multiprocessing backend being unable to pickle large objects nn.fit(self.R_t) distances, indices = nn.kneighbors(self.R_t, return_distance = True) # Precompute similarity matrix S data = list(chain.from_iterable(1.0 - distances)) rows = list(chain.from_iterable([i] * self.k for i in range(P))) cols = list(chain.from_iterable(indices)) # (P,P)-matrix with cosine similarities between items self.S = csr_matrix((data,(rows, cols)), (P,P)).todense() def observe(self, observation): for session in observation.sessions(): self.user_history[session['v']] += 1 def act(self, observation, reward, done): """Act method returns an Action based on current observation and past history""" self.observe(observation) scores = self.user_history.dot(self.S).A1 if self.greedy: action = np.argmax(scores) prob = np.zeros_like(scores) prob[action] = 1.0 else: scores **= self.alpha prob = scores / np.sum(scores) action = self.rng.choice(self.S.shape[0], p = prob) return { **super().act(observation, reward, done), **{ 'a': action, 'ps': prob[action], 'ps-a': prob, }, } def reset(self): self.user_history = np.zeros(P) class userkNNAgent(Agent): def __init__(self, config, U = U, P = P, k = 5, greedy = False, alpha = 1): super(userkNNAgent, self).__init__(config) self.rng = RandomState(self.config.random_seed) self.k = min(P,k) self.greedy = greedy self.alpha = alpha self.U = U self.P = P self.R = csr_matrix((U,P)) self.user_history = np.zeros(P) self.nn = NearestNeighbors(n_neighbors = self.k, metric = 'cosine') def train(self, reco_log, U = U, P = P): # Extract all organic user logs reco_log = reco_log.loc[reco_log['z'] == 'organic'] # Generate ratings matrix for training, row-based for efficient row (user) retrieval self.R = csr_matrix((np.ones(len(reco_log)), (reco_log['u'],reco_log['v'])), (U,P)) # Fit nearest neighbours self.nn.fit(self.R) def observe(self, observation): for session in observation.sessions(): self.user_history[session['v']] += 1 def act(self, observation, reward, done): """Act method returns an Action based on current observation and past history""" self.observe(observation) # Get neighbouring users based on user history distances, indices = self.nn.kneighbors(self.user_history.reshape(1,-1)) scores = np.add.reduce([dist * self.R[idx,:] for dist, idx in zip(distances,indices)]) if self.greedy: action = np.argmax(scores) prob = np.zeros_like(scores) prob[action] = 1.0 else: scores **= self.alpha prob = scores / np.sum(scores) action = self.rng.choice(self.P, p = prob) return { **super().act(observation, reward, done), **{ 'a': action, 'ps': prob[action], 'ps-a': prob, }, } def reset(self): self.user_history = np.zeros(P) # SVD Agent SVD_agent = SVDAgent(Configuration(env_1_args), U, P, 30) SVD_agent.train(reco_log) # item-kNN Agent itemkNN_agent = itemkNNAgent(Configuration(env_1_args), U, P, 500, greedy = True) itemkNN_agent.train(reco_log) # user-kNN Agent userkNN_agent = userkNNAgent(Configuration(env_1_args), U, P, 20, greedy = True) userkNN_agent.train(reco_log) # Generalised Popularity agent GPOP_agent = OrganicCount(Configuration({ **env_1_args, 'select_randomly': True, })) # Generalised Popularity agent GPOP_agent_greedy = OrganicCount(Configuration({ **env_1_args, 'select_randomly': False, })) # Peronalised Popularity agent PPOP_agent = OrganicUserEventCounterAgent(Configuration({ **organic_user_count_args, **env_1_args, 'select_randomly': True, })) # Peronalised Popularity agent PPOP_agent_greedy = OrganicUserEventCounterAgent(Configuration({ **organic_user_count_args, **env_1_args, 'select_randomly': False, })) # Random Agent random_args['num_products'] = P RAND_agent = RandomAgent(Configuration({**env_1_args, **random_args,})) %%time # Placeholder for agents agents = { ' Random': RAND_agent, ' Popular': GPOP_agent_greedy, ' User-pop': PPOP_agent, ' SVD': SVD_agent, ' User-kNN': userkNN_agent, 'Item-kNN': itemkNN_agent, } agent_ids = sorted(list(agents.keys()))#['SVD','GPOP','PPOP','RAND'] # Generate new logs, to be used for offline testing n_test_users = 5000 # U test_log = env.generate_logs(n_test_users) n_events = test_log.shape[0] n_organic = test_log.loc[test_log['z'] == 'organic'].shape[0] print('Testing on {0} organic and {1} bandit events'.format(n_organic, n_events - n_organic)) def plot_barchart(result, title, xlabel, col = 'tab:red', figname = 'fig.eps', size = (6,2), fontsize = 12): fig, axes = plt.subplots(figsize = size) plt.title(title, size = fontsize) n_agents = len(result) yticks = np.arange(n_agents) mean = result['0.500'] lower = result['0.500'] - result['0.025'] upper = result['0.975'] - result['0.500'] plt.barh(yticks, mean, height = .25, xerr = (lower, upper), align = 'center', color = col,) plt.yticks(yticks, result['Agent'], size = fontsize) plt.xticks(size = fontsize) plt.xlabel(xlabel, size = fontsize) plt.xlim(.0,None) plt.gca().xaxis.set_major_formatter(FormatStrFormatter('%.2f')) plt.savefig(figname, bbox_inches = 'tight') plt.show() %%time result_LOO = verify_agents_traditional(test_log, deepcopy(agents)) display(result_LOO) plot_barchart(result_LOO, 'Evaluate on Organic Feedback', 'HR@1', 'tab:red', 'traditional_eval.eps') # Generate new logs, to be used for offline testing test_log_ppop = env.generate_logs(n_test_users, agent = deepcopy(PPOP_agent)) test_log_ppop.head() %%time cap = 15 result_IPS, result_CIPS, result_SNIPS = verify_agents_counterfactual(test_log_ppop, deepcopy(agents), cap = cap) display(result_IPS) plot_barchart(result_IPS, 'IPS', 'CTR', 'tab:blue', 'bandit_eval_noclip.eps') display(result_CIPS) plot_barchart(result_CIPS, 'Clipped IPS', 'CTR', 'tab:blue', 'bandit_eval_clip{0}.eps'.format(cap)) %%time result_AB = verify_agents(env, n_test_users, deepcopy(agents)) display(result_AB) plot_barchart(result_AB, 'A/B-test', 'CTR', 'tab:green', 'ABtest_eval.eps') def combine_barchart(resultAB, resultCIPS, title, xlabel, figname = 'fig.eps', size = (6,2), fontsize = 12): fig, axes = plt.subplots(figsize = size) plt.title(title, size = fontsize) n_agents = len(resultAB) for i, (name, colour, result) in enumerate([('A/B-test', 'tab:green', result_AB),('CIPS', 'tab:blue', result_CIPS)]): mean = result['0.500'] lower = result['0.500'] - result['0.025'] upper = result['0.975'] - result['0.500'] height = .25 yticks = [a + i * height for a in range(n_agents)] plt.barh(yticks, mean, height = height, xerr = (lower, upper), align = 'edge', label = name, color = colour) plt.yticks(yticks, result['Agent'], size = fontsize) plt.xticks(size = fontsize) plt.xlabel(xlabel, size = fontsize) plt.legend(loc = 'lower right') plt.xlim(.0,None) plt.gca().xaxis.set_major_formatter(FormatStrFormatter('%.3f')) plt.savefig(figname, bbox_inches = 'tight') plt.show() combine_barchart(result_AB, result_CIPS, 'Evaluate on Bandit Feedback', 'CTR', 'ABtest_CIPS.eps') plot_barchart(result_LOO, 'Evaluate on Organic Feedback', 'HR@1', 'tab:red', 'traditional_eval.eps')