From the Data Science from Scratch book.
import math as m
def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2
def calc_inverse_normal_cdf(p: float, mu:float = 0, sigma: float = 1, tolerance: float = 1E-5, show_steps=False) -> float:
if p == 0: return -np.inf
if p == 1: return np.inf
# In case it is not a standard normal distribution, calculate the standard normal first and then rescale
if mu != 0 or sigma != 1:
return mu + sigma * calc_inverse_normal_cdf(p, tolerance=tolerance)
low_z = -10
hi_z = 10
if show_steps: print(f"{'':<19}".join(['low_z', 'mid_z', 'hi_z']), "\n")
while hi_z - low_z > tolerance:
mid_z = (low_z + hi_z) / 2
mid_p = calc_normal_cdf(mid_z)
if mid_p < p:
low_z = mid_z
else:
hi_z = mid_z
if show_steps: print("\t".join(map(to_string, [low_z, mid_z, hi_z])))
return mid_z
def normal_upper_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
"""Return z for which P(Z <= z) = probability"""
return calc_inverse_normal_cdf(probabilty, mu, sigma)
def normal_lower_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
"""Return z for which P(Z >= z) = probability"""
return calc_inverse_normal_cdf(1 - probabilty, mu, sigma)
def normal_two_sided_bounds(probability: float, mu: float = 0, sigma: float = 1) -> float:
if probability == 0: return 0, 0
tail_probability = (1 - probability) / 2
lower_bound = normal_upper_bound(tail_probability, mu, sigma)
upper_bound = normal_lower_bound(tail_probability, mu, sigma)
return lower_bound, upper_bound
We would like to know if a coin is 'fair' or not (i.e. p=0.5)
We flip the coin 1000 times and observe 525 heads.
As we do not know the 'real' probability, we need to rely on our observed results.
p_hat = 525 / 1000
mu = p_hat
sigma = m.sqrt(p_hat * (1 - p_hat) / 1000)
p_hat, mu, sigma
(0.525, 0.525, 0.015791611697353755)
While we cannot tell if this is really the probabiliy of heads, we can calculate the interval within which we expect the real probability fall with our chosen confidence (here: 95%)
confidence = 0.95
normal_two_sided_bounds(confidence, mu, sigma)
(0.4940490278129096, 0.5559509721870904)
As 0.5 is within the interval, we cannot reject the hypothesis that the coin is fair.
In another example we observe 540 heads in 1000 flips.
p_hat = 540 / 1000
mu = p_hat
sigma = m.sqrt(p_hat * (1 - p_hat) / 1000)
p_hat, mu, sigma
(0.54, 0.54, 0.015760710643876435)
Going through the same steps as above, we get that 0.5 falls outside our 95% confidence interval and therefore we reject the hypothesis that the coin is 'fair'.
normal_two_sided_bounds(confidence, mu, sigma)
(0.5091095927295919, 0.5708904072704082)