Testing out different bandit methods for effective online testing methods
!pip install git+https://github.com/sparsh-ai/recochef.git
Collecting git+https://github.com/sparsh-ai/recochef.git Cloning https://github.com/sparsh-ai/recochef.git to /tmp/pip-req-build-1qpnc3ak Running command git clone -q https://github.com/sparsh-ai/recochef.git /tmp/pip-req-build-1qpnc3ak Installing build dependencies ... done Getting requirements to build wheel ... done Preparing wheel metadata ... done Requirement already satisfied: importlib-metadata; python_version < "3.8" in /usr/local/lib/python3.7/dist-packages (from recochef==0.0.post1.dev7+g49ee6bc) (4.5.0) Requirement already satisfied: typing-extensions>=3.6.4; python_version < "3.8" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < "3.8"->recochef==0.0.post1.dev7+g49ee6bc) (3.7.4.3) Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < "3.8"->recochef==0.0.post1.dev7+g49ee6bc) (3.4.1) Building wheels for collected packages: recochef Building wheel for recochef (PEP 517) ... done Created wheel for recochef: filename=recochef-0.0.post1.dev7+g49ee6bc-cp37-none-any.whl size=19052 sha256=8ee4ffa07f9b2f40145e765690655b013d98cd50fb0c039a062e00207c28032b Stored in directory: /tmp/pip-ephem-wheel-cache-tvyexl4d/wheels/4c/90/0f/19e93713ae38e283b3ea7281df324a99bd704095da42261514 Successfully built recochef Installing collected packages: recochef Successfully installed recochef-0.0.post1.dev7+g49ee6bc
import math
from typing import List
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
from recochef.models.epsilon import EpsilonGreedyRunner
from recochef.models.thompson import ThompsonSamplingRunner
class SplitTestRunner:
"""
Class that is used to run simulations of split tests.
Attributes:
bandit_returns: List of average returns per bandit.
batch_size: Number of examples per batch.
batches: Number of batches.
simulations: Number of simulations.
Methods:
init_bandits: Prepares everything for new simulation.
run: Runs the simulations and tracks performance.
"""
def __init__(self, bandit_returns: List[float], batch_size: int=1000, batches: int=10, simulations: int=100):
"""
Initializes a new RunSplitTest class with passed parameters.
Args:
bandit_returns: List of average returns per bandit.
batch_size: Number of examples per batch.
batches: Number of batches.
simulations: Number of simulations.
"""
self.bandit_returns = bandit_returns
self.n_bandits = len(bandit_returns)
self.bandits = list(range(self.n_bandits))
self.bandit_positive_examples = [0] * self.n_bandits
self.bandit_total_examples = [0] * self.n_bandits
self.batch_size = batch_size
self.batches = batches
self.simulations = simulations
self.df_bids = pd.DataFrame(columns=self.bandit_returns)
self.df_clicks = pd.DataFrame(columns=self.bandit_returns)
def init_bandits(self):
"""
Prepares everything for new simulation.
"""
self.bandit_positive_examples = [0] * self.n_bandits
self.bandit_total_examples = [0] * self.n_bandits
def run(self):
"""
Runs the simulations and tracks performance.
"""
for j in range(self.simulations):
self.init_bandits()
for i in range(self.batches):
examples = self.batch_size // self.n_bandits
for idx in self.bandits:
self.bandit_total_examples[idx] += examples
self.bandit_positive_examples[idx] += np.random.binomial(examples, self.bandit_returns[idx])
if self.df_bids.shape[0] < self.batches:
self.df_bids.loc[i] = self.bandit_total_examples
self.df_clicks.loc[i] = self.bandit_positive_examples
else:
self.df_bids.loc[i] += self.bandit_total_examples
self.df_clicks.loc[i] += self.bandit_positive_examples
self.df_bids /= self.simulations
self.df_clicks /= self.simulations
def line_plot(df: pd.DataFrame, title:str, x_label: str, y_label: str):
"""
Plots a line plot of the passed DataFrame.
Args:
df: DataFrame containing data to be plotted.
title: Title of the plot.
x_label: Title of the x-axis.
y_label: Title of the y-axis.
"""
stacked_data = df.apply(lambda x: x*100/sum(x), axis=1)
stacked_data.plot(kind="line", stacked=True)
plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
def stacked_plot(df: pd.DataFrame, title:str, x_label: str, y_label: str):
"""
Plots a stacked area plot of the passed DataFrame.
Args:
df: DataFrame containing data to be plotted.
title: Title of the plot.
x_label: Title of the x-axis.
y_label: Title of the y-axis.
"""
stacked_data = df.apply(lambda x: x*100/sum(x), axis=1)
stacked_data.plot(kind="area", stacked=True, figsize=(12,6))
plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
def plot_line_plots(rst: SplitTestRunner, reg: EpsilonGreedyRunner, rts: ThompsonSamplingRunner):
"""
Plots a line plots for each type of test.
Args:
rst: Split test simulation class.
reg: Epsilon-greedy MAB test simulation class.
rts: Thompson sampling test simulation class.
"""
line_plot(rst.df_bids,
title='A/B Test Bandit Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
line_plot(reg.df_bids,
title='Epsilon Greedy Bandit Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
line_plot(rts.df_bids,
title='Thompson Sampling Bandit Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
def plot_stacked_plots(rst: SplitTestRunner, reg: EpsilonGreedyRunner, rts: ThompsonSamplingRunner):
"""
Plots a stacked area plots for each type of test.
Args:
rst: Split test simulation class.
reg: Epsilon-greedy MAB test simulation class.
rts: Thompson sampling test simulation class.
"""
stacked_plot(rst.df_bids,
title='A/B Test Bandit Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
stacked_plot(reg.df_bids,
title='Epsilon Greedy Bandit Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
stacked_plot(rts.df_bids,
title='Thompson Sampling Bandit Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
def plot_gain(rst: SplitTestRunner, reg: EpsilonGreedyRunner, rts: ThompsonSamplingRunner):
"""
Plots the returns of each kind of test over the batches it ran.
Args:
rst: Split test simulation class.
reg: Epsilon-greedy MAB test simulation class.
rts: Thompson sampling test simulation class.
"""
rst_gain = rst.df_clicks.sum(axis=1)
reg_gain = reg.df_clicks.sum(axis=1)
rts_gain = rts.df_clicks.sum(axis=1)
df_gains = pd.concat([rst_gain, reg_gain, rts_gain], axis=1)
df_gains.rename(columns={0: 'Split Test', 1: 'Epsilon Greedy MAB', 2: 'Thompson Sampling'}, inplace=True)
df_gains.plot(figsize=(12,6))
def z_calc(p1: float, p2: float, n1: int, n2: int) -> float:
"""
Calculates the z value for the difference of two sample means.
Args:
p1: Mean of first sample.
p2: Mean of second sample.
n1: Sample size of the first sample.
n2: Sample size of the second sample.
Returns:
The calculated z value.
"""
p_star = (p1*n1 + p2*n2) / (n1 + n2)
return (p2 - p1) / math.sqrt(p_star*(1 - p_star)*((1.0 / n1) + (1.0 / n2)))
def sample_required(p1: float, p2: float, alpha: float=0.01) -> int:
"""
Calculates the sample size needed to provide a test power of (1-alpha)
in which we are testing h0: p2-p1==0.
Args:
p1: Mean of first sample.
p2: Mean of second sample.
alpha: Type one error.
Returns:
The calculated sample size.
"""
n = 1
while True:
z = z_calc(p1, p2, n1=n, n2=n)
p = 1 - stats.norm.cdf(z)
if p < alpha:
break
n += 1
return n
def closest_pair(bandits: List[float]) -> (float, float):
"""
Finds the two bandits that have the closest average return
and returns their returns.
Args:
bandits: list of average bandit returns.
Returns:
The average return of the two bandits that are the most
similar.
"""
bandits.sort()
min_diff = float("inf")
p1 = -1
p2 = -1
for i in range(len(bandits)-1):
if bandits[i+1] - bandits[i] < min_diff:
p1 = bandits[i]
p2 = bandits[i+1]
min_diff = p2 - p1
return p1, p2
def get_minimum_sample(bandits: List[float], alpha: float=0.01) -> int:
"""
Gets the minimum sample size required to provide a test power of
(1-alpha/len(bandits)), this includes the p-value Bonferroni correction.
Args:
bandits: list of average bandit returns.
alpha: Type one error.
Returns:
Needed sample size.
"""
p1, p2 = closest_pair(bandits)
return sample_required(p1, p2, alpha/len(bandits))
def get_number_batches(examples_needed: int, batch_size: int) -> int:
"""
Gets the number of batches based on the number of needed examples
and provided batch size.
Args:
examples_needed: Total number of examples needed.
batch_size: Number of examples per batch.
Returns:
The number of batches.
"""
return math.ceil(examples_needed / batch_size)
def simulate(bandits: List[float], alpha: float=0.001, batch_size: int=5000, simulations: int=1000, epsilon: float=0.1, sample_size: int=1000) -> (SplitTestRunner, EpsilonGreedyRunner, ThompsonSamplingRunner):
"""
Runs simulations for split tests, Epsilon-greedy multi-armed bandits
and Thompson sampling based on the provided parameters.
Args:
bandits: list of average bandit returns.
alpha: Type one error.
batch_size: Number of examples per batch.
simulations: Number of simaltions per test type.
epsilon: percentage of exploration in epsilon-greedy MAB
sample_size: sample size per bandit for each Thompson sampling batch
Returns:
The classes for each type of test.
"""
examples_needed = get_minimum_sample(bandits, alpha)
batches = get_number_batches(examples_needed, batch_size)
rst = SplitTestRunner(bandits,
batch_size=batch_size,
batches=batches,
simulations=simulations)
reg = EpsilonGreedyRunner(bandits,
epsilon=epsilon,
batch_size=batch_size,
batches=batches,
simulations=simulations)
rts = ThompsonSamplingRunner(bandits,
alpha_priors=None,
beta_priors=None,
sample_size=sample_size,
batch_size=batch_size,
batches=batches,
simulations=simulations)
rst.run()
reg.run()
rts.run()
return rst, reg, rts
def run_simulations(bandits: List[float], alpha: float=0.001, batch_size: int=1000,
simulations: int=1000, epsilon: float=0.1, sample_size: int=1000):
"""
Starts the simulation process, gets the results and makes plots.
Args:
bandits: list of average bandit returns.
alpha: Type one error.
batch_size: Number of examples per batch.
simulations: Number of simaltions per test type.
epsilon: percentage of exploration in epsilon-greedy MAB
sample_size: sample size per bandit for each Thompson sampling batch
"""
rst, reg, rts = simulate(bandits=bandits,
alpha=alpha,
batch_size=batch_size,
simulations=simulations,
epsilon=epsilon,
sample_size=sample_size)
plot_stacked_plots(rst=rst,
reg=reg,
rts=rts)
plot_gain(rst=rst,
reg=reg,
rts=rts)
def simulate_ts(bandits: List[float], alpha_priors: List[float], beta_priors: List[float], batch_size: int=5000, simulations: int=1000, sample_size: int=1000):
batches = 700
rts = ThompsonSamplingRunner(bandits,
alpha_priors=None,
beta_priors=None,
sample_size=sample_size,
batch_size=batch_size,
batches=batches,
simulations=simulations)
rts.run()
stacked_plot(rts.df_bids,
title='Thompson Sampling Resources Allocation',
x_label='Batch',
y_label='Bandit Allocation (%)')
return rts.df_bids, rts.df_clicks
bandit_returns = [.001, .004, .0042, 0, 0.006]
alpha = 0.01
batch_size = 5000
simulations = 100
epsilon = 0.1
sample_size = 1000
run_simulations(bandits=bandit_returns,
alpha=alpha,
batch_size=batch_size,
simulations=simulations,
epsilon=epsilon,
sample_size=sample_size)
bandit_returns = [.006, .004, .0041, 0, 0.0062]
alpha_priors = [1., 1., 1., 1., 1.]
beta_priors = [1., 1., 1., 1., 1.]
batch_size = 5000
simulations = 2
sample_size = 1000
b, c = simulate_ts(bandits=bandit_returns,
alpha_priors=alpha_priors,
beta_priors=beta_priors,
batch_size=batch_size,
simulations=simulations,
sample_size=sample_size)
bandit_returns = [.006, .004, .0041, 0, 0.0058]
alpha_priors = [1., 1., 1., 1., 1.]
beta_priors = [1., 1., 1., 1., 1.]
batch_size = 5000
simulations = 2
sample_size = 1000
b, c = simulate_ts(bandits=bandit_returns,
alpha_priors=alpha_priors,
beta_priors=beta_priors,
batch_size=batch_size,
simulations=simulations,
sample_size=sample_size)