In the previous post we calculated [proabilities]({% post_url 2020-10-07-probability-test %}) and [z values]({% post_url 2020-10-09-inverse-normal-cdf %}) with Python. Here we continue with finding the interval boundaries belonging to a particular tail or two-sided probabiltiy.
import math as m
import numpy as np
def to_string(number: float, column_width: int = 20) -> str:
# return str(number).ljust(column_width)
return f"{str(number): <{column_width}}"
def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2
def calc_inverse_normal_cdf(p: float, mu:float = 0, sigma: float = 1, tolerance: float = 1E-5, show_steps=False) -> float:
if p == 0: return -np.inf
if p == 1: return np.inf
# In case it is not a standard normal distribution, calculate the standard normal first and then rescale
if mu != 0 or sigma != 1:
return mu + sigma * calc_inverse_normal_cdf(p, tolerance=tolerance)
low_z = -10
hi_z = 10
if show_steps: print(f"{'':<19}".join(['low_z', 'mid_z', 'hi_z']), "\n")
while hi_z - low_z > tolerance:
mid_z = (low_z + hi_z) / 2
mid_p = calc_normal_cdf(mid_z)
if mid_p < p:
low_z = mid_z
else:
hi_z = mid_z
if show_steps: print("\t".join(map(to_string, [low_z, mid_z, hi_z])))
return mid_z
def normal_upper_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
"""Return z for which P(Z <= z) = probability"""
return calc_inverse_normal_cdf(probabilty, mu, sigma)
def normal_lower_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
"""Return z for which P(Z >= z) = probability"""
return calc_inverse_normal_cdf(1 - probabilty, mu, sigma)
for i in np.arange(0, 1.1, .1):
print(f"{i:.1f}: {normal_upper_bound(i): >10.4f}, {normal_lower_bound(i): >10.4f}")
0.0: -inf, inf 0.1: -1.2816, 1.2816 0.2: -0.8416, 0.8416 0.3: -0.5244, 0.5244 0.4: -0.2533, 0.2533 0.5: -0.0000, -0.0000 0.6: 0.2533, -0.2533 0.7: 0.5244, -0.5244 0.8: 0.8416, -0.8416 0.9: 1.2816, -1.2816 1.0: inf, -inf
Two variations
def normal_two_sided_bounds(probability: float, mu: float = 0, sigma: float = 1) -> float:
if probability == 0: return 0, 0
tail_probability = (1 - probability) / 2
lower_bound = normal_upper_bound(tail_probability, mu, sigma)
upper_bound = normal_lower_bound(tail_probability, mu, sigma)
return lower_bound, upper_bound
def normal_two_sided_bounds2(probability: float, mu: float = 0, sigma: float = 1) -> float:
if probability == 0: return 0, 0
if mu != 0:
raise ValueError("Requires standard normal distribution")
tail_probability = (1 - probability) / 2
z = normal_upper_bound(tail_probability, mu, sigma)
return -z, z
By default they do not produce the same results at the extremes because of how the inverse cdf function defines the theoretical uppern and lower boundaries (as in these cases it should produce +/- infinity).
Accordingly, it is useful to define extreme values (i.e. probablities of 0 and 1)
for i in np.arange(0, 1.1, .1):
print(f"{i:.1f}: {normal_two_sided_bounds(i) == normal_two_sided_bounds2(i)}, {normal_two_sided_bounds(i)}, {normal_two_sided_bounds2(i)}")
0.0: True, (0, 0), (0, 0) 0.1: False, (-0.12566566467285156, 0.12566566467285156), (0.12566566467285156, -0.12566566467285156) 0.2: False, (-0.2533435821533203, 0.2533435821533203), (0.2533435821533203, -0.2533435821533203) 0.3: False, (-0.3853130340576172, 0.3853130340576172), (0.3853130340576172, -0.3853130340576172) 0.4: False, (-0.5243968963623047, 0.5243968963623047), (0.5243968963623047, -0.5243968963623047) 0.5: False, (-0.6744861602783203, 0.6744861602783203), (0.6744861602783203, -0.6744861602783203) 0.6: False, (-0.8416271209716797, 0.8416271209716797), (0.8416271209716797, -0.8416271209716797) 0.7: False, (-1.0364246368408203, 1.0364246368408203), (1.0364246368408203, -1.0364246368408203) 0.8: False, (-1.2815570831298828, 1.2815570831298828), (1.2815570831298828, -1.2815570831298828) 0.9: False, (-1.6448497772216797, 1.6448497772216797), (1.6448497772216797, -1.6448497772216797) 1.0: False, (-inf, inf), (inf, -inf)