Calculating probabilities of values in relation to threshold values.
The code is based upon the respective example from Data Science from Scratch.
from typing import Tuple
import math as m
import pandas as pd
import numpy as np
$ p = \frac {1 + \text{erf} \ z ( \frac {x - \mu} {\sqrt{2} \sigma} )} {2} $
where
$ \text{erf} \ z = \frac {2} {\sqrt{\pi}} \ \int_0^z e^{-t^2} dt $
is the error function.
def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2
normal_probability_below = calc_normal_cdf
for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
print("\t".join([str(i), f"{normal_probability_below(i):.4f}"]))
-1.0 0.1587 -0.9 0.1841 -0.8 0.2119 -0.7 0.2420 -0.6 0.2743 -0.5 0.3085 -0.4 0.3446 -0.3 0.3821 -0.2 0.4207 -0.1 0.4602 0.0 0.5000 0.1 0.5398 0.2 0.5793 0.3 0.6179 0.4 0.6554 0.5 0.6915 0.6 0.7257 0.7 0.7580 0.8 0.7881 0.9 0.8159 1.0 0.8413
def normal_probability_above(lo: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_below(lo, mu, sigma)
for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
print("\t".join([str(i), f"{normal_probability_above(i):.4f}"]))
-1.0 0.8413 -0.9 0.8159 -0.8 0.7881 -0.7 0.7580 -0.6 0.7257 -0.5 0.6915 -0.4 0.6554 -0.3 0.6179 -0.2 0.5793 -0.1 0.5398 0.0 0.5000 0.1 0.4602 0.2 0.4207 0.3 0.3821 0.4 0.3446 0.5 0.3085 0.6 0.2743 0.7 0.2420 0.8 0.2119 0.9 0.1841 1.0 0.1587
domain = np.arange(-10, 10 + 1, 2) / 10
domain
array([-1. , -0.8, -0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8, 1. ])
def normal_probability_between(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
return normal_probability_below(hi, mu, sigma) - normal_probability_below(lo, mu, sigma)
probabilities_between = pd.DataFrame()
for i in domain:
for j in domain:
probabilities_between.loc[i, j] = normal_probability_between(i, j)
probabilities_between = pd.DataFrame(np.triu(probabilities_between), index=domain,
columns=domain).replace(0, np.NaN)
for i in domain:
probabilities_between.loc[i, i] = 0
probabilities_between
-1.0 | -0.8 | -0.6 | -0.4 | -0.2 | 0.0 | 0.2 | 0.4 | 0.6 | 0.8 | 1.0 | |
---|---|---|---|---|---|---|---|---|---|---|---|
-1.0 | 0.0 | 0.0532 | 0.115598 | 0.185923 | 0.262085 | 0.341345 | 0.420604 | 0.496766 | 0.567092 | 0.629489 | 0.682689 |
-0.8 | NaN | 0.0000 | 0.062398 | 0.132723 | 0.208885 | 0.288145 | 0.367404 | 0.443566 | 0.513891 | 0.576289 | 0.629489 |
-0.6 | NaN | NaN | 0.000000 | 0.070325 | 0.146487 | 0.225747 | 0.305007 | 0.381169 | 0.451494 | 0.513891 | 0.567092 |
-0.4 | NaN | NaN | NaN | 0.000000 | 0.076162 | 0.155422 | 0.234681 | 0.310843 | 0.381169 | 0.443566 | 0.496766 |
-0.2 | NaN | NaN | NaN | NaN | 0.000000 | 0.079260 | 0.158519 | 0.234681 | 0.305007 | 0.367404 | 0.420604 |
0.0 | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.079260 | 0.155422 | 0.225747 | 0.288145 | 0.341345 |
0.2 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.076162 | 0.146487 | 0.208885 | 0.262085 |
0.4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.070325 | 0.132723 | 0.185923 |
0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.062398 | 0.115598 |
0.8 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.053200 |
1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 |
def normal_probability_outside(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_between(lo, hi, mu, sigma)
probabilities_outside = pd.DataFrame()
for i in domain:
for j in domain:
probabilities_outside.loc[i, j] = normal_probability_outside(i, j)
pd.DataFrame(np.triu(probabilities_outside), index=domain, columns=domain).replace(0, np.NaN)
-1.0 | -0.8 | -0.6 | -0.4 | -0.2 | 0.0 | 0.2 | 0.4 | 0.6 | 0.8 | 1.0 | |
---|---|---|---|---|---|---|---|---|---|---|---|
-1.0 | 1.0 | 0.9468 | 0.884402 | 0.814077 | 0.737915 | 0.658655 | 0.579396 | 0.503234 | 0.432908 | 0.370511 | 0.317311 |
-0.8 | NaN | 1.0000 | 0.937602 | 0.867277 | 0.791115 | 0.711855 | 0.632596 | 0.556434 | 0.486109 | 0.423711 | 0.370511 |
-0.6 | NaN | NaN | 1.000000 | 0.929675 | 0.853513 | 0.774253 | 0.694993 | 0.618831 | 0.548506 | 0.486109 | 0.432908 |
-0.4 | NaN | NaN | NaN | 1.000000 | 0.923838 | 0.844578 | 0.765319 | 0.689157 | 0.618831 | 0.556434 | 0.503234 |
-0.2 | NaN | NaN | NaN | NaN | 1.000000 | 0.920740 | 0.841481 | 0.765319 | 0.694993 | 0.632596 | 0.579396 |
0.0 | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.920740 | 0.844578 | 0.774253 | 0.711855 | 0.658655 |
0.2 | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.923838 | 0.853513 | 0.791115 | 0.737915 |
0.4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.929675 | 0.867277 | 0.814077 |
0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.937602 | 0.884402 |
0.8 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.946800 |
1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 |