# 6: Central limit theorem¶

#### Back to main page¶

In [2]:
# nbi:hide_in
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10, 5)

N=1000 #max number of samples
num = 1000 # number of experiments
a=2
b=4
# intsize the number of intervals
plot_width = 2

data =np.random.rand(N, num)*(b-a)+a
mean = (b+a)/2
sigma = np.sqrt((b-a)**2/12)

def pdf_func(xdata, mu, sigma):
val = np.exp(-np.power(xdata-mu,2)/(2*sigma**2))/(sigma *np.sqrt(2*np.pi))
return val

def epmf(x, inter, intsize):
epmf_values = np.zeros(intsize-1)
for i in range(intsize-1):
length = inter[i+1]-inter[i]
epmf_values[i] = np.sum((inter[i]<=x) & (x<inter[i+1]))/(x.size*length)
return epmf_values

def mean_hist(n, intsize=21):
xvalues = np.linspace(a,b, 1000)
plt.plot(xvalues, pdf_func(xvalues, mean, sigma/np.sqrt(n)), linewidth=2, color="red")
x = np.sum(data[0:n,:], axis=0)/n
xmax = np.max(x)
xmin = np.min(x)
inter = np.linspace(xmin,xmax,intsize)
length = inter[1]-inter[0]
epmf_values = epmf(x, inter, intsize)
plt.bar(inter[:intsize-1], epmf_values, width=length,
color='#039be5', edgecolor='black', linewidth=1,
align="edge", label="True histogran")
plt.figtext(0.8,0.8, "n = {}".format(n), ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")

def mean_hist_std(n, intsize=20):
x = np.sum(data[0:n,:], axis=0)/n
z = np.sqrt(n)*(x-mean)/sigma
zmax = np.round(np.max(z), 2)
zmin = np.round(np.min(z), 2)
inter = np.linspace(zmin,zmax,intsize)
length = inter[1]-inter[0]
epmf_values = epmf(z, inter, intsize)

# plot normal distribution
xvalues = np.linspace(zmax,zmin, 1000)
plt.plot(xvalues, pdf_func(xvalues, 0, 1), linewidth=2, color="red")

plt.bar(inter[:intsize-1], epmf_values, width=length,
color='#039be5', edgecolor='black', linewidth=1,
align="edge", label="True histogran")
plt.figtext(0.8,0.8, "n = {}".format(n), ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.xlim(-5, 5)
plt.title("CLT for uniform distribution")

mean_hist_std(20, 15)

plt.show();

In [1]:
# nbi:hide_in
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10, 5)

N=1000 #max number of samples
num = 1000 # number of experiments
# intsize the number of intervals
theta=2

data =np.random.exponential(theta, [N, num] )
mean = theta
sigma = theta

def pdf_func(xdata, mu, sigma):
val = np.exp(-np.power(xdata-mu,2)/(2*sigma**2))/(sigma *np.sqrt(2*np.pi))
return val

def epmf(x, inter, intsize):
epmf_values = np.zeros(intsize-1)
for i in range(intsize-1):
length = inter[i+1]-inter[i]
epmf_values[i] = np.sum((inter[i]<=x) & (x<inter[i+1]))/(x.size*length)
return epmf_values

def mean_hist(n, intsize=25):
xvalues = np.linspace(mean-5,mean+5, 1000)
plt.plot(xvalues, pdf_func(xvalues, mean, sigma/np.sqrt(n)), linewidth=2, color="red")
x = np.sum(data[0:n,:], axis=0)/n
xmax = np.max(x)
xmin = np.min(x)
inter = np.linspace(xmin,xmax,intsize)
length = inter[1]-inter[0]
epmf_values = epmf(x, inter, intsize)
plt.bar(inter[:intsize-1], epmf_values, width=length,
color='#039be5', edgecolor='black', linewidth=1,
align="edge", label="True histogran")
plt.figtext(0.8,0.8, "n = {}".format(n), ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")

def mean_hist_std(n, intsize=26):
x = np.sum(data[0:n,:], axis=0)/n
z = np.sqrt(n)*(x-mean)/sigma
zmax = np.round(np.max(z), 2)
zmin = np.round(np.min(z), 2)
inter = np.linspace(zmin,zmax,intsize)
length = inter[1]-inter[0]
epmf_values = epmf(z, inter, intsize)

xvalues = np.linspace(zmin,zmax, 1000)
plt.plot(xvalues, pdf_func(xvalues, 0, 1), linewidth=2, color="red")

plt.bar(inter[:intsize-1], epmf_values, width=length,
color='#039be5', edgecolor='black', linewidth=1,
align="edge", label="True histogran")
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.figtext(0.8,0.8, "n = {}".format(n), ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.xlim(-5, 5)
plt.title("CLT for exponential distribution")

# mean_hist_std(10)
# mean_hist(10, 51)
mean_hist_std(50, 15)

plt.show();


### Binomial vs Normal vs Poisson¶

In [84]:
# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb, factorial
from scipy.stats import poisson

def pdf_func(xdata, mu, sigma):
val = np.exp(-np.power(xdata-mu,2)/(2*sigma**2))/(sigma *np.sqrt(2*np.pi))
return val

def poiss_binom_pmf(n, p):
lam = n*p
q = 1-p
N = 50
brange_x = np.arange(0, np.minimum(n, 100), 1)
prange_x = np.arange(0, np.minimum(n, 100), 1)

mean = n*p
sigma = np.sqrt(n*p*q)
xvalues = np.linspace(mean-15,mean+15, 1000)

def poiss_pmf(x, lam):
pmf_val = np.exp(-lam) * np.power(lam, x) / factorial(x)
return pmf_val

#     ppmf_values = np.array([poiss_pmf(x, lam) for x in prange_x])
ppmf_values = np.array([poisson.pmf(x, lam) for x in prange_x])
def binom_pmf(x):
pmf_val = comb(n, x, exact=True) * p**x * q**(n-x)
return pmf_val
mean = n*p

bpmf_values = np.array([binom_pmf(x) for x in brange_x])

# plot setup
plt.figure(figsize=(14,7))
plt.axhline(y=0, color='k')
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Plotting poisson
plt.bar(prange_x, ppmf_values, width=1, color=(0.1, 0.1, 1, 0.15), edgecolor=(0.1, 0.1, 1, 0.3),
linewidth=1.3, label="Poisson",zorder=-1)

# Plotting binomial
plt.bar(brange_x, bpmf_values, width=1, color=(0.3, 0.5, 0.3, 0.25), edgecolor=(0.1, 0.1, 1, 0.3),
linewidth=1.3, label="Binomial", zorder=-2)

# Plotting normal for Binomial
plt.plot(xvalues, pdf_func(xvalues, mean, sigma), linewidth=3, color="green",
label=r"normal $\approx$ Binomial", zorder=3)

# Plotting normal for Poisson
plt.plot(xvalues, pdf_func(xvalues, lam, np.sqrt(lam)), linewidth=3, color="blue",
label=r"normal $\approx$ Poisson", zorder=3)

plt.figtext(0.81,0.7, " n = {}".format(n)+"\n p = {}".format(p), ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.legend()
plt.plot();

poiss_binom_pmf(50, 0.5)