#!/usr/bin/env python
# coding: utf-8
#
Table of Contents
#
# In[13]:
from texttable import Texttable
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import pandas as pd
import scipy.stats
# In[2]:
import warnings
warnings.filterwarnings("ignore")
# In this chapter, we will only be dealing with special distributions which are frequently encountered in practice.
# # Discrete Uniform Distribution
# We have seen PMF and PDF in last chapter, here we review an example of discrete uniform distribution.
# In[62]:
unif_d = sp.stats.randint.rvs(0, 10, size = 1000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(unif_d, density = True, bins = 30)
ax.set_title('Discrete Uniform Frequency Distribution', size = 16)
plt.show()
# ## Discrete Probability Mass Function
# In[63]:
x = np.arange(1, 11)
unif_pmf = sp.stats.randint.pmf(x, 2, 10)
fig, ax = plt.subplots(figsize = (12, 7))
ax.scatter(x, unif_pmf, s = 100, color = 'green', label = 'Low = 2, High = 9')
ax.plot(x,unif_pmf, lw = 3, color = 'k')
ax.legend(fontsize = 18, loc = 'center')
# # Binomial Distribution
# The binomial experiment has 4 properties:
#
# * A sequence of $n$ identical trials
# * Only two outcomes are possible: $success$ or $failure$
# * The probability of success $p$ does not change from trial to trial
# * Trials are independent events
#
# The PMF of binomial ditribution is
#
#
#
# \begin{equation}
# _nC_k p^kq^{n-k}
# \end{equation}
#
#
# ## Example 1
# We use a simple example to explain the PMF.
#
# Every month, a personal banker might meet 50 people enquiring loans, emperically 30% of them have bad credit history. So calculate probability from 1:50 people that have bad credit history.
# First we can asnwer what the probability is that the banker meet exactly $14$ people who have bad credit history?
# In[8]:
n = 50
k = 14 # what is the prob that exact 14 ppl she met had bad credit history?
b = scipy.special.comb(50, 14)
p = .3
f_bino = b*p**k*(1-p)**(n-k)
print('The prob of meeting {0} persons who have bad credit history is {1:.2f}%.'.format(k, f_bino * 100))
# Or we can use ```scipy.stats.binom.pmf```. To show the probability from $1$ to $50$ persons.
# In[9]:
n = 50
p = .3
bad_credit = np.arange(1, 51)
y = sp.stats.binom.pmf(bad_credit, n, p)
fig, ax = plt.subplots(figsize = (10, 6))
ax.plot(bad_credit, y, lw = 3, color = 'r', alpha = .5)
ax.set_ylim([0, .13])
ax.set_title('The probability that from 1 to 50 persons who have bad credit history', size = 16, x = .5, y = 1.02)
ax.set_xlabel('Number of Person Who Has Bad Credit History', size = 12)
ax.set_ylabel('Probability', size = 12)
plt.show()
# Because there are the $30\%$ of people who have bad credit history, thus we can see the mean of the distribution is $15$ out of $50$.
# ## Example 2
# Next we can formulate another question using ```scipy.stats.binom.cdf```.
#
# If a trade trades $n$ times a month, he has a $p%$ chance of winning the trade, find out the probability that he can win at least $k$ trades a month.
# We can also ask what are the probabilites he wins more than $k$ trades, or between $(k_1, \ k_2)$ trades.
# In[10]:
n = 20 # number of trades
p = .55 # winning odds
k = 12 # at least win k trades
k1 = 14
k2 = 4
win_less = sp.stats.binom.cdf(k, n, p)
win_more = 1- sp.stats.binom.cdf(k, n, p)
win_betw = sp.stats.binom.cdf(k1, n, p) - sp.stats.binom.cdf(k2, n, p)
# In[12]:
table = Texttable()
table.set_cols_align([ 'c', 'c', 'c'])
table.set_cols_valign([ 'm', 'm', 'm'])
table.add_rows([['Win Less', ' Win More ', ' Win btw 4~14 '],
[ win_less, win_more, win_betw]])
print(table.draw())
# What if the probability of wining changing from 0.1 to 0.8, what is the probability that he wins less than 6 trades, assuming every month he trades 20 times.
# In[19]:
chance = np.arange(.1, .81, .05)
win_less = sp.stats.binom.cdf(6, 20, chance)
data_dict = {'win_rate':chance, 'win_less':win_less} # I am using annotation, that's why i am using pandas to locate the index
df = pd.DataFrame(data_dict)
df.plot(figsize = (10, 6))
plt.show()
# To show the idea in a scatter plot.
# In[25]:
fig, ax = plt.subplots(figsize = (12, 7))
ax.scatter(chance, win_less)
txt = 'This point means if the winning rate is 40%,\n then the probability of winning less\n than 6 trades is 25%.'
ax.annotate(txt, xy = (df.iloc[6][0], df.iloc[6][1]),
xytext = (.35, .5), weight = 'bold', color = 'Tomato', size = 14,
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'))
plt.show()
# ## Binomial R.V. Generator
# ```sp.stats.binom.rvs()``` is the randome generator for binomial distribution.
# In[32]:
n = 1000 # number of trials
p = 0.3 # probability of success
bino = sp.stats.binom.rvs(n, p, size = 1000)
# In[33]:
txt = 'This line is the $y =p \cdot n$ \n and where highest draw should be.'
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(bino,bins= 80)
ax.axvline(p*n, color = 'Tomato', ls = '--', lw = 3)
ax.annotate(txt, xy = (p*n, h.max()*0.7),
xytext = (p*n ,h.max()-5), weight = 'bold',
color = 'Tomato', size = 14,
arrowprops = dict(arrowstyle = '->',
connectionstyle = 'arc3', color = 'b'))
ax.set_title('Generated Binomial R.V.s', size = 18)
plt.show()
# According to the example, if success rate $p=.3$, total trials of $1000$.Then the counts of successes have the frequency distribution in the histogram.
# ## Moments of Binomial Distribution
# We can also calculate all important moments of distributions by using ```sp.stats.binom.stats(n, p, moments = 'mvsk')```. The text table library is used to format the output.
# In[34]:
n = 1000 # number of trials
p = 0.3 # probability of success
bino_stats = sp.stats.binom.stats(n, p, moments = 'mvsk') # mean, variance, skewness, kurtosis
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', 'kurtosis '],
[ bino_stats[0], bino_stats[1], bino_stats[2], bino_stats[3]]])
print(table.draw())
# # Poisson Distribution
# When $n\rightarrow\infty$ and $p\rightarrow0$,binomial distribution converges to Poisson distribution, i.e. when $n$ is large and $p$ is small, we can use Poisson to approximate Binomial.
# ## Example
# For an ordinary traders, every trade has $1/1000$ probabilty to encounter a 'Black Swan' shock, a trader sets $20$ trades per month, what is the probability that she will encounter $2$ 'Black Swan' within 5 years?
#
# This problem can be solved by Binomial, the formulation as below
#
# \begin{equation}
# \text{Number of Trades} = 20\times 12\times 5=1200\\
# P(x=2) = \binom{1200}{2}\Big(\frac{1}{1000}\Big)^2\Big(\frac{999}{1000}\Big)^{1198}
# \end{equation}
# Of course, we can employ Poisson PMF to approximate it, the parameter for Poisson distribution is
#
# \begin{equation}
# \lambda = np = 1200 \times \frac{1}{1000} = 1.2
# \end{equation}
#
#
# that means every 5 years, there is in average 1.2 times of Black Swan shock.
#
# \begin{equation}
# P(x=2)=\frac{\lambda^ke^{-\lambda}}{k!}=\frac{1.2^2e^{-1.2}}{2!}
# \end{equation}
# To use the PMF directly
# In[39]:
sp.special.comb(1200, 2)*(1/1000)**2*(999/1000)**1198
# Or use the built-in function for Poisson ```sp.stats.poisson.pmf()```.
# In[41]:
k = 2
n = 20 * 12 * 5 # 20 times per month, and 5 years span
p = 1/1000
lambdaP = p * n # lambda in Poisson
p = sp.stats.poisson.pmf(k, lambdaP)
print('The probability of having {0} BS shock(s) in a span of 5 years is {1:.2f}%.'.format(k, p*100))
# Suprisingly high probability of having 1 BS shock, and one BS shock could possibly wipe out the whole account. Take care, traders, the survival is pivotal!
#
# So what's the probability of having more than $k$ times BS shock?
# In[42]:
k = 2
prob_sf = 1 - sp.stats.poisson.cdf(k, lambdaP)
prob_sf_inv = sp.stats.poisson.cdf(k, lambdaP)
print('The probability of having more than %1.0f BS shock in 5 years is %3.3f%%.' % (k, prob_sf*100)) # double % can escape formating
# ## Poisson R.V. Generator
# In[46]:
poiss = sp.stats.poisson.rvs(lambdaP, size = 10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(poiss, density = True, bins = 10)
ax.set_title('Poisson Frequency Distribution', size = 16)
plt.show()
# ## Moments of Poisson Distribution
# Again we can compute all most important moments
# In[47]:
poiss_stats = sp.stats.poisson.stats(k, moments = 'mvsk') # mean, variance, skewness, kurtosis
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', 'kurtosis '],
[ poiss_stats[0], poiss_stats[1], poiss_stats[2], poiss_stats[3]]])
print(table.draw())
# # Geometric Distribution
# The PMF of G-D is $f(k)=p(1-p)^k$, where $k$ is a non-negative integer. $p$ is the probability of success, and $k$ is the number of failures before success.
# ## Example
#
# So G-D is trying to answer 'How many times you have to fail in order to embrace the first success?'
#
# If each trade has 1/1000 chance to encounter a BS shock, what is the prob of encounter a BS shock after trade $k$ times.
# In[48]:
k = 10
p = 1/1000
geodist = (1 - 1/1000)**10*1/1000
geodist
print('The probability of observing exact %1.0f times of safe trading before a BS shock is %3.3f.' %(k, geodist))
# Or use built-in ```sp.stats.geom.pmf()```
# In[49]:
sp.stats.geom.pmf(10, 1/1000)
# In[50]:
sp.stats.geom.cdf(k,p)
print('The probability of observing %1.0f or fewer than %1.0f times of safe trading before a BS shock is %3.3f.'%(k,k,geodist))
# ## G-D Moments and Generator
# The moments and randome generator are as following
# In[51]:
mean, var, skew, kurt = sp.stats.geom.stats(p, moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c', 'c', 'c'])
table.set_cols_valign([ "m", "m", 'm','m', 'm', 'm'])
table.add_rows([['p','k',"mean", " variance ", ' skewness ', ' kurtosis '],
[ p, k, mean, var, skew, kurt]])
print(table.draw())
# In[53]:
geomet = sp.stats.geom.rvs(p, size = 10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(geomet, density = True, bins = 30)
ax.set_title('Geometric Frequency Distribution', size = 16)
plt.show()
# # Hypergeometric Distribution
# The main difference between hypergeometric and binomial is that the former sampling is not independent of each other, i.e. the sampling is without replacement.
#
# The formula is
# \begin{equation}
# f(x) =\frac{{K\choose x} {M-K \choose N-x}}{{M\choose N}}
# \end{equation}
# ## Example
# There is 100 candies in an urn, 20 are red, 80 are blue, if we take 5 of them out from it. What is the prob of having exact 4 red candies?
#
# Solution:
#
# \begin{equation}
# \frac{{20\choose4}{80\choose1}}{{100\choose5}}
# \end{equation}
#
# To solve it:
# In[54]:
C = sp.special.comb(20, 4)*sp.special.comb(80, 1) /sp.special.comb(100, 5)
print('The prob of have 4 red candies by taking 5 out is %1.6f%%.'% (C*100))
# Or with built-in function ```sp.stats.hypergeom.pmf()```
# In[55]:
# pmf(x, M, N, n, loc=0)
hgeo = sp.stats.hypergeom.pmf(4, 100, 20, 5,loc = 0) # the arg order the same as MATLAB, not recommended
hgeo
# What is the probability that at most $4$ red candies are taken? i.e. The sum-up of probabilities of drawing $1$, $2$, $3$, and $4$ candies.
# In[57]:
hgeo_cdf = sp.stats.hypergeom.cdf(4, 100, 20, 5,loc = 0) # the arg order the same as MATLAB, not recommended
print('The prob of have at most 4 red candies by taking 5 out is %1.6f%%.' %(hgeo_cdf*100))
# In[59]:
hgeo_rv = sp.stats.hypergeom.rvs(100, 20, 5, size = 10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(hgeo_rv, density = True)
ax.set_title('Geometric Frequency Distribution', size = 16)
s = ''' It can be interpreted as: if 100 candies in the urn,
20 are red, take 5 out of 100.
The chance of getting from 1 to 5 red candies,
is shown in the chart.
As we can see it is nearly impossible to get 4 or 5 candies.
But getting 1 red candy is the most possible outcome.'''
ax.text(1.6, .5, s, fontsize=14, color ='red')
plt.show()
# In[60]:
mean, var, skew, kurt = sp.stats.hypergeom.stats(100, 20, 5, moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c', 'c', 'c'])
table.set_cols_valign([ "m", "m", 'm','m', 'm', 'm'])
table.add_rows([['M','k',"mean", " variance ", ' skewness ', ' kurtosis '],
[ 100, 20, mean, var, skew, kurt]])
print(table.draw())
# # Continous Uniform Distribution
# The PDF of Uniform Distribution is
#
# \begin{equation}
# f(x)=\frac{1}{b-a}
# \end{equation}
#
# And its r.v. generator is one of most frequently used function in NumPy: ```np.random.rand()```
# In[65]:
unif = np.random.rand(10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(unif, density = True, bins = 30)
ax.set_title('Continous Uniform Frequency Distribution', size = 16)
plt.show()
# ## CDF and PDF of Continous Uniform Distribution
# In[66]:
# pdf(x, loc=0, scale=1)
# cdf(x, loc=0, scale=1)
x = np.linspace(-.2, 1.2, 100)
unif_pdf = sp.stats.uniform.pdf(x)
unif_cdf = sp.stats.uniform.cdf(x)
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (17, 7))
ax[0].plot(x,unif_pdf, lw = 4, label = 'PDF of Continouse U-D')
ax[0].set_xlim([-.1, 1.1])
ax[0].set_ylim([0, 2])
ax[0].legend(fontsize = 16)
ax[1].plot(x,unif_cdf, lw = 4, label = 'CDF of Continouse U-D')
ax[1].set_xlim([-.2, 1.2])
ax[1].set_ylim([0, 2])
ax[1].legend(fontsize = 16)
plt.show()
# In[67]:
mean, var, skew, kurt = sp.stats.uniform.stats(moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', ' kurtosis '],
[ mean, var, skew, kurt]])
print(table.draw())
# # Normal Distribution
# The most convenient method of creating normal distribution plot is to use ```sp.stats.norm.pdf()```.
#
# The PDF function single normal distribution is
#
# $$
# f(x)= \frac{1}{\sigma \sqrt{2 \pi}} e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^{2}}
# $$
# In[69]:
mu = 2
sigma = 1
x = np.arange(-2, 6, 0.1)
norm_pdf = sp.stats.norm.pdf(x, mu, sigma)
norm_cdf = sp.stats.norm.cdf(x, mu, sigma)
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (17, 7))
ax[0].plot(x,norm_pdf, lw = 4, label = 'PDF of Normal Distribution', ls = '--')
ax[0].legend(fontsize = 16, loc = 'lower left', framealpha=0.2)
ax[1].plot(x,norm_cdf, lw = 4, label = 'CDF of Normal Distribution')
ax[1].legend(fontsize = 16,fancybox=True, framealpha=0.5)
plt.show()
# ## Inverse Normal CDF
# The inverse normal CDF is commonly used in calculating $p$-value, the plot below is useful to understand the idea.
# In[72]:
norm_95_r = sp.stats.norm.ppf(.975) # ppf mean point percentage function, actually inverse CDF
norm_95_l = sp.stats.norm.ppf(.025)
x = np.linspace(-5, 5, 200)
y = sp.stats.norm.pdf(x)
xl = np.linspace(-5, norm_95_l, 100)
yl = sp.stats.norm.pdf(xl)
xr = np.linspace(norm_95_r, 5, 100)
yr = sp.stats.norm.pdf(xr)
fig, ax = plt.subplots(figsize = (17, 7))
ax.plot(x,y, lw = 4, label = 'PDF of Normal Distribution', ls = '-', color = 'orange')
ax.set_ylim([0, .45])
ax.fill_between(x, y, 0, alpha=0.1, color = 'blue')
ax.fill_between(xl,yl, 0, alpha=0.6, color = 'blue')
ax.fill_between(xr,yr, 0, alpha=0.6, color = 'blue')
ax.text(-.2, 0.15, '95%', fontsize = 20)
ax.text(-2.3, 0.015, '2.5%', fontsize = 12, color = 'white')
ax.text(2.01, 0.015, '2.5%', fontsize = 12, color = 'white')
ax.annotate('±%.4f' %norm_95_r, xy = (norm_95_r, 0), xytext = (-.4, .05), weight = 'bold', color = 'r',
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'), fontsize = 16)
ax.annotate('±%.4f' %norm_95_r, xy = (norm_95_l, 0), xytext = (-.4, .05), weight = 'bold', color = 'r',
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'), fontsize = 16)
ax.set_title('Normal Distribution And 2.5% Shaded Area', size = 20)
plt.show()
# ## Normal R.V. Generator
# To generate a normal distribution histogram
# In[74]:
# rvs(loc=0, scale=1, size=1, random_state=None)
norm_rv = sp.stats.norm.rvs(mu, sigma, size = 5000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(norm_rv, density = True, bins = 50)
ax.set_title('Normal Frequency Distribution', size = 16)
plt.show()
# In[75]:
mean, var, skew, kurt = sp.stats.norm.stats(mu, sigma, moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', ' kurtosis '],
[ mean, var, skew, kurt]])
print(table.draw())
# ## Bivariate Normal Distribution
# The multivariate normal distribution density function is
# \begin{equation}
# f_\boldsymbol{X}(x_1,...,x_2)=\frac{1}{\sqrt{(2\pi)^k|\Sigma|}}\exp{\Big(-\frac{(x-\mu)^T\Sigma^{-1}(x-\mu)}{2}\Big)}
# \end{equation}
#
# ### 1st Method of Formulation
# In[76]:
get_ipython().run_line_magic('matplotlib', 'inline')
mu_x = 0
sigma_x = 2
mu_y = 0
sigma_y = 2
#Create grid and multivariate normal
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X, Y = np.meshgrid(x,y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y # more technical than next one
norm = sp.stats.multivariate_normal([mu_x, mu_y], [[sigma_x, 0], [0, sigma_y]]) # frozen
#Make a 3D plot
fig = plt.figure(figsize = (10, 6))
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, norm.pdf(pos),cmap='viridis',linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
plt.show()
# ### 2st Method of Formulation
# In[77]:
#Parameters to set
mu_x = 0
sigma_x = 7
mu_y = 0
sigma_y = 15
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X,Y = np.meshgrid(x,y)
pos = np.array([X.flatten(),Y.flatten()]).T # more intuitive than last one
rv = sp.stats.multivariate_normal([mu_x, mu_y], [[sigma_x, 0], [0, sigma_y]])
fig = plt.figure(figsize=(10,10))
ax0 = fig.add_subplot(111)
ax0.contourf(X, Y, rv.pdf(pos).reshape(500,500),cmap='viridis')
plt.show()
# # Beta Distribution
# The PDF of Beta distribution is
#
#
# \begin{equation}
# f(x, a, b)=\frac{\Gamma(a+b) x^{a-1}(1-x)^{b-1}}{\Gamma(a) \Gamma(b)}
# \end{equation}
#
# where $0\leq x \leq 1$ and $a>0$, $b>0$, $\Gamma$ is the Gamma function.
#
# Beta distribution is a natural good choice for priors in Bayesian econometrics since its range is bounded in unit.
# In[89]:
params[0][0][1]
# In[98]:
x = np.linspace(0, 1, 100)
fig = plt.figure(figsize=(9, 9))
ax = fig.add_subplot(111)
params = np.array([[[.5,.5]],
[[5,1]],
[[1,5]],
[[2,2]],
[[2,3]],
[[3,2]],
[[1,1]]])
for i in range(params.shape[0]):
beta_pdf = sp.stats.beta.pdf(x, params[i][0][0], params[i][0][1])
ax.plot(x, beta_pdf, lw = 3, label = '$a = %.1f, b = %.1f$' % (params[i][0][0], params[i][0][1]))
ax.legend()
ax.axis([0, 1, 0, 3])
# In[97]:
fig = plt.figure(figsize=(9, 9))
ax = fig.add_subplot(111)
x = np.linspace(0,1,100)
params = np.array([[[.5,.5]],
[[5,1]],
[[1,5]],
[[2,2]],
[[2,3]],
[[3,2]],
[[1,1]]])
for i in range(params.shape[0]):
beta_cdf = sp.stats.beta.cdf(x, params[i][0][0], params[i][0][1])
ax.plot(x, beta_cdf, lw = 3, label = '$a = %.1f, b = %.1f$' % (params[i][0][0], params[i][0][1]))
ax.legend()
ax.axis([0, 1, 0, 1])
# # $\chi^2$ Distribution
# $\chi^2$ distribution is closely connected with normal distributions, if $z$ has the standard normal distribution, then $z^2$ has the $\chi^2$ distribution with $d.f.=1$. And futher,if
#
#
# \begin{equation}
# z_1, z_2, ..., z_k \sim i.i.d. N(0, 1)
# \end{equation}
#
# Then summation has a $\chi^2$ distribution of $d.f. = k$:
#
#
# \begin{equation}
# \sum_{i=0}^k z_i^2 \sim \chi^2(k)
# \end{equation}
#
# ## $\chi^2$ PDF and CDF
# In[99]:
k = 1
x = np.linspace(0, 5, 100)
chi_pdf = sp.stats.chi2.pdf(x, k)
fig, ax = plt.subplots(figsize = (12, 7))
ax.plot(x, chi_pdf, lw = 3, c = 'r', label = '$\chi^2$ distribution with d.f. = 1')
ax.legend(fontsize = 18)
plt.show()
# In[100]:
fig, ax = plt.subplots(figsize = (12, 7))
for i in range(1, 6):
x = np.linspace(0, 5, 100)
chi_pdf = sp.stats.chi2.pdf(x, i)
ax.plot(x, chi_pdf, lw = 3, label = '$\chi^2$ distribution with d.f. = %.0d'%i)
ax.legend(fontsize = 12)
ax.axis([0, 5, 0, 1])
plt.show()
# # F Distribution
# If $U_1$ has a $\chi^2$ distribution with $\nu_1$ d.f. and $U_2$ has a $\chi^2$ distribution with $\nu_2$ d.f., then
#
#
# \begin{equation}
# \frac{U_1/\nu_1}{U_2/\nu_2}\sim F(\nu_1, \nu_2)
# \end{equation}
#
# We are using $F$ distribution for ratios of variances.
# In[101]:
x = np.linspace(0, 5, 100)
fig, ax = plt.subplots(figsize = (12, 7))
df1 = 10
df2 = 5
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$' %(df1, df2))
df1 = 5
df2 = 10
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$ '%(df1, df2))
df1 = 8
df2 = 15
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, color = 'red', label = '$df_1 = %.d, df_2 = %.d$' %(df1, df2))
df1 = 15
df2 = 8
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$ '%(df1, df2))
ax.legend(fontsize = 15)
plt.show()
# In[ ]: