#!/usr/bin/env python # coding: utf-8 # # Hypothesis test power from scratch with Python # Calculating power of hypothesis tests. # # The code is from the [Data Science from Scratch](https://www.oreilly.com/library/view/data-science-from/9781492041122/) book. # # ## Libraries and helper functions # In[1]: from typing import Tuple import math as m # In[2]: def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float: return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2 normal_probability_below = calc_normal_cdf # In[3]: def normal_probability_between(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float: return normal_probability_below(hi, mu, sigma) - normal_probability_below(lo, mu, sigma) # In[4]: def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float: return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2 # In[5]: def calc_inverse_normal_cdf(p: float, mu:float = 0, sigma: float = 1, tolerance: float = 1E-5, show_steps=False) -> float: if p == 0: return -np.inf if p == 1: return np.inf # In case it is not a standard normal distribution, calculate the standard normal first and then rescale if mu != 0 or sigma != 1: return mu + sigma * calc_inverse_normal_cdf(p, tolerance=tolerance) low_z = -10 hi_z = 10 if show_steps: print(f"{'':<19}".join(['low_z', 'mid_z', 'hi_z']), "\n") while hi_z - low_z > tolerance: mid_z = (low_z + hi_z) / 2 mid_p = calc_normal_cdf(mid_z) if mid_p < p: low_z = mid_z else: hi_z = mid_z if show_steps: print("\t".join(map(to_string, [low_z, mid_z, hi_z]))) return mid_z # In[6]: def normal_upper_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float: return calc_inverse_normal_cdf(probabilty, mu, sigma) # In[7]: def normal_lower_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float: return calc_inverse_normal_cdf(1 - probabilty, mu, sigma) # In[8]: def normal_two_sided_bounds(probability: float, mu: float = 0, sigma: float = 1) -> float: if probability == 0: return 0, 0 tail_probability = (1 - probability) / 2 lower_bound = normal_upper_bound(tail_probability, mu, sigma) upper_bound = normal_lower_bound(tail_probability, mu, sigma) return lower_bound, upper_bound # In[9]: def normal_approximation_to_binomial(n: int, p: float) -> Tuple[float, float]: mu = p * n sigma = m.sqrt(p * (1 - p) * n) return mu, sigma # ## Type 1 Error and Tolerance # Let's make our null hypothesis ($H_0$) that the probability of head is 0.5 # In[10]: mu_0, sigma_0 = normal_approximation_to_binomial(1000, 0.5) mu_0, sigma_0 # We define our tolerance at 5%. That is, we accept our model to produce 'type 1' errors (false positive) in 5% of the time. With the coin flipping example, we expect to receive 5% of the results to fall outsied of our defined interval. # In[11]: lo, hi = normal_two_sided_bounds(0.95, mu_0, sigma_0) lo, hi # ## Type 2 Error and Power # At type 2 error we consider false negatives, that is, those cases where we fail to reject our null hypothesis even though we should. # Let's assume that contra $H_0$ the actual probability is 0.55. # In[12]: mu_1, sigma_1 = normal_approximation_to_binomial(1000, 0.55) mu_1, sigma_1 # In this case we get our Type 2 probability as the overlapping of the real distribution and the 95% probability region of $H_0$. In this particular case, in 11% of the cases we will wrongly fail to reject our null hypothesis. # In[13]: type_2_probability = normal_probability_between(lo, hi, mu_1, sigma_1) type_2_probability # The power of the test is then the probability of rightly rejecting the $H_0$ # In[14]: power = 1 - type_2_probability power # Now, let's redefine our null hypothesis so that we expect the probability of head to be less than or equal to 0.5. # # In this case we have a one-sided test. # In[15]: hi = normal_upper_bound(0.95, mu_0, sigma_0) hi # Because this is a less strict hypothesis than our previus one, it has a smaller T2 probability and a greater power. # In[16]: type_2_probability = normal_probability_below(hi, mu_1, sigma_1) type_2_probability # In[17]: power = 1 - type_2_probability power