# Probability test¶

Calculating probabilities of values in relation to threshold values.

The code is based upon the respective example from Data Science from Scratch.

## Libraries¶

In [1]:
from typing import Tuple
import math as m

import pandas as pd
import numpy as np

## Probability below a threshold¶

$p = \frac {1 + \text{erf} \ z ( \frac {x - \mu} {\sqrt{2} \sigma} )} {2}$

where

$\text{erf} \ z = \frac {2} {\sqrt{\pi}} \ \int_0^z e^{-t^2} dt$

is the error function.

In [2]:
def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2
In [3]:
normal_probability_below = calc_normal_cdf
In [4]:
for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
print("\t".join([str(i), f"{normal_probability_below(i):.4f}"]))
-1.0	0.1587
-0.9	0.1841
-0.8	0.2119
-0.7	0.2420
-0.6	0.2743
-0.5	0.3085
-0.4	0.3446
-0.3	0.3821
-0.2	0.4207
-0.1	0.4602
0.0	0.5000
0.1	0.5398
0.2	0.5793
0.3	0.6179
0.4	0.6554
0.5	0.6915
0.6	0.7257
0.7	0.7580
0.8	0.7881
0.9	0.8159
1.0	0.8413

## Probability above a threshold¶

In [5]:
def normal_probability_above(lo: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_below(lo, mu, sigma)
In [6]:
for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
print("\t".join([str(i), f"{normal_probability_above(i):.4f}"]))
-1.0	0.8413
-0.9	0.8159
-0.8	0.7881
-0.7	0.7580
-0.6	0.7257
-0.5	0.6915
-0.4	0.6554
-0.3	0.6179
-0.2	0.5793
-0.1	0.5398
0.0	0.5000
0.1	0.4602
0.2	0.4207
0.3	0.3821
0.4	0.3446
0.5	0.3085
0.6	0.2743
0.7	0.2420
0.8	0.2119
0.9	0.1841
1.0	0.1587

## Probability between thresholds¶

In [7]:
domain = np.arange(-10, 10 + 1, 2) / 10
domain
Out[7]:
array([-1. , -0.8, -0.6, -0.4, -0.2,  0. ,  0.2,  0.4,  0.6,  0.8,  1. ])
In [8]:
def normal_probability_between(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
return normal_probability_below(hi, mu, sigma) - normal_probability_below(lo, mu, sigma)
In [9]:
probabilities_between = pd.DataFrame()

for i in domain:
for j in domain:
probabilities_between.loc[i, j] = normal_probability_between(i, j)

probabilities_between = pd.DataFrame(np.triu(probabilities_between), index=domain,
columns=domain).replace(0, np.NaN)

for i in domain:
probabilities_between.loc[i, i] = 0

probabilities_between
Out[9]:
-1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0
-1.0 0.0 0.0532 0.115598 0.185923 0.262085 0.341345 0.420604 0.496766 0.567092 0.629489 0.682689
-0.8 NaN 0.0000 0.062398 0.132723 0.208885 0.288145 0.367404 0.443566 0.513891 0.576289 0.629489
-0.6 NaN NaN 0.000000 0.070325 0.146487 0.225747 0.305007 0.381169 0.451494 0.513891 0.567092
-0.4 NaN NaN NaN 0.000000 0.076162 0.155422 0.234681 0.310843 0.381169 0.443566 0.496766
-0.2 NaN NaN NaN NaN 0.000000 0.079260 0.158519 0.234681 0.305007 0.367404 0.420604
0.0 NaN NaN NaN NaN NaN 0.000000 0.079260 0.155422 0.225747 0.288145 0.341345
0.2 NaN NaN NaN NaN NaN NaN 0.000000 0.076162 0.146487 0.208885 0.262085
0.4 NaN NaN NaN NaN NaN NaN NaN 0.000000 0.070325 0.132723 0.185923
0.6 NaN NaN NaN NaN NaN NaN NaN NaN 0.000000 0.062398 0.115598
0.8 NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.000000 0.053200
1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.000000

## Probabilities outside a threshold range¶

In [10]:
def normal_probability_outside(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_between(lo, hi, mu, sigma)
In [11]:
probabilities_outside = pd.DataFrame()

for i in domain:
for j in domain:
probabilities_outside.loc[i, j] = normal_probability_outside(i, j)

pd.DataFrame(np.triu(probabilities_outside), index=domain, columns=domain).replace(0, np.NaN)
Out[11]:
-1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0
-1.0 1.0 0.9468 0.884402 0.814077 0.737915 0.658655 0.579396 0.503234 0.432908 0.370511 0.317311
-0.8 NaN 1.0000 0.937602 0.867277 0.791115 0.711855 0.632596 0.556434 0.486109 0.423711 0.370511
-0.6 NaN NaN 1.000000 0.929675 0.853513 0.774253 0.694993 0.618831 0.548506 0.486109 0.432908
-0.4 NaN NaN NaN 1.000000 0.923838 0.844578 0.765319 0.689157 0.618831 0.556434 0.503234
-0.2 NaN NaN NaN NaN 1.000000 0.920740 0.841481 0.765319 0.694993 0.632596 0.579396
0.0 NaN NaN NaN NaN NaN 1.000000 0.920740 0.844578 0.774253 0.711855 0.658655
0.2 NaN NaN NaN NaN NaN NaN 1.000000 0.923838 0.853513 0.791115 0.737915
0.4 NaN NaN NaN NaN NaN NaN NaN 1.000000 0.929675 0.867277 0.814077
0.6 NaN NaN NaN NaN NaN NaN NaN NaN 1.000000 0.937602 0.884402
0.8 NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.000000 0.946800
1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.000000