Introduction to A/B testing with python.
From the Data Science from Scratch book.
import math as m
from typing import Tuple
def normal_probability_below(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2
def normal_probability_above(lo: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_below(lo, mu, sigma)
def two_sided_p_value(x: float, mu: float = 0, sigma: float = 1) -> float:
"""Return the probability of getting at least as extreme value as `x`, given
that our values are from a normal distribution with `mu` mean and `sigma` std.
"""
# If x is greater than the mean return everything above x
if x >= mu:
return 2 * normal_probability_above(x, mu, sigma)
# If x is less than the mean than return everything below x
else:
return 2 * normal_probability_below(x, mu, sigma)
def estimate_parameters(N: int, true: int) -> Tuple[float, float]:
p = true / N
sigma = m.sqrt(p * (1 - p) / N)
return p, sigma
$H_0$: $p_a$ and $p_b$ are the same
With simplification this means that $p_a - p_b = 0$
def a_b_test_statistic(N_A: int, A: int, N_B: int, B:int) -> float:
p_A, sigma_A = estimate_parameters(N_A, A)
p_B, simga_B = estimate_parameters(N_B, B)
return (p_B - p_A) / m.sqrt(sigma_A ** 2 + simga_B ** 2)
z = a_b_test_statistic(1000, 200, 1000, 180)
z, two_sided_p_value(z)
(-1.1403464899034472, 0.2541419765422359)
That is, the probability of at least such a big difference occurring assuming that the two probabilities are the same is ~0.25.
Let's decrease the occurance of $b$ even more.
z = a_b_test_statistic(1000, 200, 1000, 150)
z, two_sided_p_value(z)
(-2.948839123097944, 0.003189699706216853)
That is the probability of at least this large difference to occur if the probabilities are the same is 0.03.