Probability test

Calculating probabilities of values in relation to threshold values.

The code is based upon the respective example from Data Science from Scratch.

Libraries

In [1]:
from typing import Tuple
import math as m

import pandas as pd
import numpy as np

Probability below a threshold

$ p = \frac {1 + \text{erf} \ z ( \frac {x - \mu} {\sqrt{2} \sigma} )} {2} $

where

$ \text{erf} \ z = \frac {2} {\sqrt{\pi}} \ \int_0^z e^{-t^2} dt $

is the error function.

In [2]:
def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
    return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2
In [3]:
normal_probability_below = calc_normal_cdf
In [4]:
for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
    print("\t".join([str(i), f"{normal_probability_below(i):.4f}"]))
-1.0	0.1587
-0.9	0.1841
-0.8	0.2119
-0.7	0.2420
-0.6	0.2743
-0.5	0.3085
-0.4	0.3446
-0.3	0.3821
-0.2	0.4207
-0.1	0.4602
0.0	0.5000
0.1	0.5398
0.2	0.5793
0.3	0.6179
0.4	0.6554
0.5	0.6915
0.6	0.7257
0.7	0.7580
0.8	0.7881
0.9	0.8159
1.0	0.8413

Probability above a threshold

In [5]:
def normal_probability_above(lo: float, mu: float = 0, sigma: float = 1) -> float:
    return 1 - normal_probability_below(lo, mu, sigma)
In [6]:
for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
    print("\t".join([str(i), f"{normal_probability_above(i):.4f}"]))
-1.0	0.8413
-0.9	0.8159
-0.8	0.7881
-0.7	0.7580
-0.6	0.7257
-0.5	0.6915
-0.4	0.6554
-0.3	0.6179
-0.2	0.5793
-0.1	0.5398
0.0	0.5000
0.1	0.4602
0.2	0.4207
0.3	0.3821
0.4	0.3446
0.5	0.3085
0.6	0.2743
0.7	0.2420
0.8	0.2119
0.9	0.1841
1.0	0.1587

Probability between thresholds

In [7]:
domain = np.arange(-10, 10 + 1, 2) / 10
domain
Out[7]:
array([-1. , -0.8, -0.6, -0.4, -0.2,  0. ,  0.2,  0.4,  0.6,  0.8,  1. ])
In [8]:
def normal_probability_between(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
    return normal_probability_below(hi, mu, sigma) - normal_probability_below(lo, mu, sigma)
In [9]:
probabilities_between = pd.DataFrame()

for i in domain:
    for j in domain:
        probabilities_between.loc[i, j] = normal_probability_between(i, j)

probabilities_between = pd.DataFrame(np.triu(probabilities_between), index=domain, 
columns=domain).replace(0, np.NaN)

for i in domain:
    probabilities_between.loc[i, i] = 0

probabilities_between
Out[9]:
-1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0
-1.0 0.0 0.0532 0.115598 0.185923 0.262085 0.341345 0.420604 0.496766 0.567092 0.629489 0.682689
-0.8 NaN 0.0000 0.062398 0.132723 0.208885 0.288145 0.367404 0.443566 0.513891 0.576289 0.629489
-0.6 NaN NaN 0.000000 0.070325 0.146487 0.225747 0.305007 0.381169 0.451494 0.513891 0.567092
-0.4 NaN NaN NaN 0.000000 0.076162 0.155422 0.234681 0.310843 0.381169 0.443566 0.496766
-0.2 NaN NaN NaN NaN 0.000000 0.079260 0.158519 0.234681 0.305007 0.367404 0.420604
0.0 NaN NaN NaN NaN NaN 0.000000 0.079260 0.155422 0.225747 0.288145 0.341345
0.2 NaN NaN NaN NaN NaN NaN 0.000000 0.076162 0.146487 0.208885 0.262085
0.4 NaN NaN NaN NaN NaN NaN NaN 0.000000 0.070325 0.132723 0.185923
0.6 NaN NaN NaN NaN NaN NaN NaN NaN 0.000000 0.062398 0.115598
0.8 NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.000000 0.053200
1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.000000

Probabilities outside a threshold range

In [10]:
def normal_probability_outside(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
    return 1 - normal_probability_between(lo, hi, mu, sigma)
In [11]:
probabilities_outside = pd.DataFrame()

for i in domain:
    for j in domain:
        probabilities_outside.loc[i, j] = normal_probability_outside(i, j)

pd.DataFrame(np.triu(probabilities_outside), index=domain, columns=domain).replace(0, np.NaN)        
Out[11]:
-1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0
-1.0 1.0 0.9468 0.884402 0.814077 0.737915 0.658655 0.579396 0.503234 0.432908 0.370511 0.317311
-0.8 NaN 1.0000 0.937602 0.867277 0.791115 0.711855 0.632596 0.556434 0.486109 0.423711 0.370511
-0.6 NaN NaN 1.000000 0.929675 0.853513 0.774253 0.694993 0.618831 0.548506 0.486109 0.432908
-0.4 NaN NaN NaN 1.000000 0.923838 0.844578 0.765319 0.689157 0.618831 0.556434 0.503234
-0.2 NaN NaN NaN NaN 1.000000 0.920740 0.841481 0.765319 0.694993 0.632596 0.579396
0.0 NaN NaN NaN NaN NaN 1.000000 0.920740 0.844578 0.774253 0.711855 0.658655
0.2 NaN NaN NaN NaN NaN NaN 1.000000 0.923838 0.853513 0.791115 0.737915
0.4 NaN NaN NaN NaN NaN NaN NaN 1.000000 0.929675 0.867277 0.814077
0.6 NaN NaN NaN NaN NaN NaN NaN NaN 1.000000 0.937602 0.884402
0.8 NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.000000 0.946800
1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.000000