#!/usr/bin/env python # coding: utf-8 #

Table of Contents

#
# In[13]: from texttable import Texttable import numpy as np import matplotlib.pyplot as plt import scipy as sp import pandas as pd import scipy.stats # In[2]: import warnings warnings.filterwarnings("ignore") # In this chapter, we will only be dealing with special distributions which are frequently encountered in practice. # # Discrete Uniform Distribution # We have seen PMF and PDF in last chapter, here we review an example of discrete uniform distribution. # In[62]: unif_d = sp.stats.randint.rvs(0, 10, size = 1000) fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(unif_d, density = True, bins = 30) ax.set_title('Discrete Uniform Frequency Distribution', size = 16) plt.show() # ## Discrete Probability Mass Function # In[63]: x = np.arange(1, 11) unif_pmf = sp.stats.randint.pmf(x, 2, 10) fig, ax = plt.subplots(figsize = (12, 7)) ax.scatter(x, unif_pmf, s = 100, color = 'green', label = 'Low = 2, High = 9') ax.plot(x,unif_pmf, lw = 3, color = 'k') ax.legend(fontsize = 18, loc = 'center') # # Binomial Distribution # The binomial experiment has 4 properties: # # * A sequence of $n$ identical trials # * Only two outcomes are possible: $success$ or $failure$ # * The probability of success $p$ does not change from trial to trial # * Trials are independent events # # The PMF of binomial ditribution is #
#
# # \begin{equation} # _nC_k p^kq^{n-k} # \end{equation} # #
# ## Example 1 # We use a simple example to explain the PMF. # # Every month, a personal banker might meet 50 people enquiring loans, emperically 30% of them have bad credit history. So calculate probability from 1:50 people that have bad credit history. # First we can asnwer what the probability is that the banker meet exactly $14$ people who have bad credit history? # In[8]: n = 50 k = 14 # what is the prob that exact 14 ppl she met had bad credit history? b = scipy.special.comb(50, 14) p = .3 f_bino = b*p**k*(1-p)**(n-k) print('The prob of meeting {0} persons who have bad credit history is {1:.2f}%.'.format(k, f_bino * 100)) # Or we can use ```scipy.stats.binom.pmf```. To show the probability from $1$ to $50$ persons. # In[9]: n = 50 p = .3 bad_credit = np.arange(1, 51) y = sp.stats.binom.pmf(bad_credit, n, p) fig, ax = plt.subplots(figsize = (10, 6)) ax.plot(bad_credit, y, lw = 3, color = 'r', alpha = .5) ax.set_ylim([0, .13]) ax.set_title('The probability that from 1 to 50 persons who have bad credit history', size = 16, x = .5, y = 1.02) ax.set_xlabel('Number of Person Who Has Bad Credit History', size = 12) ax.set_ylabel('Probability', size = 12) plt.show() # Because there are the $30\%$ of people who have bad credit history, thus we can see the mean of the distribution is $15$ out of $50$. # ## Example 2 # Next we can formulate another question using ```scipy.stats.binom.cdf```. # # If a trade trades $n$ times a month, he has a $p%$ chance of winning the trade, find out the probability that he can win at least $k$ trades a month. # We can also ask what are the probabilites he wins more than $k$ trades, or between $(k_1, \ k_2)$ trades. # In[10]: n = 20 # number of trades p = .55 # winning odds k = 12 # at least win k trades k1 = 14 k2 = 4 win_less = sp.stats.binom.cdf(k, n, p) win_more = 1- sp.stats.binom.cdf(k, n, p) win_betw = sp.stats.binom.cdf(k1, n, p) - sp.stats.binom.cdf(k2, n, p) # In[12]: table = Texttable() table.set_cols_align([ 'c', 'c', 'c']) table.set_cols_valign([ 'm', 'm', 'm']) table.add_rows([['Win Less', ' Win More ', ' Win btw 4~14 '], [ win_less, win_more, win_betw]]) print(table.draw()) # What if the probability of wining changing from 0.1 to 0.8, what is the probability that he wins less than 6 trades, assuming every month he trades 20 times. # In[19]: chance = np.arange(.1, .81, .05) win_less = sp.stats.binom.cdf(6, 20, chance) data_dict = {'win_rate':chance, 'win_less':win_less} # I am using annotation, that's why i am using pandas to locate the index df = pd.DataFrame(data_dict) df.plot(figsize = (10, 6)) plt.show() # To show the idea in a scatter plot. # In[25]: fig, ax = plt.subplots(figsize = (12, 7)) ax.scatter(chance, win_less) txt = 'This point means if the winning rate is 40%,\n then the probability of winning less\n than 6 trades is 25%.' ax.annotate(txt, xy = (df.iloc[6][0], df.iloc[6][1]), xytext = (.35, .5), weight = 'bold', color = 'Tomato', size = 14, arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b')) plt.show() # ## Binomial R.V. Generator # ```sp.stats.binom.rvs()``` is the randome generator for binomial distribution. # In[32]: n = 1000 # number of trials p = 0.3 # probability of success bino = sp.stats.binom.rvs(n, p, size = 1000) # In[33]: txt = 'This line is the $y =p \cdot n$ \n and where highest draw should be.' fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(bino,bins= 80) ax.axvline(p*n, color = 'Tomato', ls = '--', lw = 3) ax.annotate(txt, xy = (p*n, h.max()*0.7), xytext = (p*n ,h.max()-5), weight = 'bold', color = 'Tomato', size = 14, arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b')) ax.set_title('Generated Binomial R.V.s', size = 18) plt.show() # According to the example, if success rate $p=.3$, total trials of $1000$.Then the counts of successes have the frequency distribution in the histogram. # ## Moments of Binomial Distribution # We can also calculate all important moments of distributions by using ```sp.stats.binom.stats(n, p, moments = 'mvsk')```. The text table library is used to format the output. # In[34]: n = 1000 # number of trials p = 0.3 # probability of success bino_stats = sp.stats.binom.stats(n, p, moments = 'mvsk') # mean, variance, skewness, kurtosis table = Texttable() table.set_cols_align([ "c", "c", 'c','c']) table.set_cols_valign([ "m", "m", 'm','m']) table.add_rows([["mean", " variance ", ' skewness ', 'kurtosis '], [ bino_stats[0], bino_stats[1], bino_stats[2], bino_stats[3]]]) print(table.draw()) # # Poisson Distribution # When $n\rightarrow\infty$ and $p\rightarrow0$,binomial distribution converges to Poisson distribution, i.e. when $n$ is large and $p$ is small, we can use Poisson to approximate Binomial. # ## Example # For an ordinary traders, every trade has $1/1000$ probabilty to encounter a 'Black Swan' shock, a trader sets $20$ trades per month, what is the probability that she will encounter $2$ 'Black Swan' within 5 years? # # This problem can be solved by Binomial, the formulation as below # # \begin{equation} # \text{Number of Trades} = 20\times 12\times 5=1200\\ # P(x=2) = \binom{1200}{2}\Big(\frac{1}{1000}\Big)^2\Big(\frac{999}{1000}\Big)^{1198} # \end{equation} # Of course, we can employ Poisson PMF to approximate it, the parameter for Poisson distribution is # # \begin{equation} # \lambda = np = 1200 \times \frac{1}{1000} = 1.2 # \end{equation} # # # that means every 5 years, there is in average 1.2 times of Black Swan shock. # # \begin{equation} # P(x=2)=\frac{\lambda^ke^{-\lambda}}{k!}=\frac{1.2^2e^{-1.2}}{2!} # \end{equation} # To use the PMF directly # In[39]: sp.special.comb(1200, 2)*(1/1000)**2*(999/1000)**1198 # Or use the built-in function for Poisson ```sp.stats.poisson.pmf()```. # In[41]: k = 2 n = 20 * 12 * 5 # 20 times per month, and 5 years span p = 1/1000 lambdaP = p * n # lambda in Poisson p = sp.stats.poisson.pmf(k, lambdaP) print('The probability of having {0} BS shock(s) in a span of 5 years is {1:.2f}%.'.format(k, p*100)) # Suprisingly high probability of having 1 BS shock, and one BS shock could possibly wipe out the whole account. Take care, traders, the survival is pivotal! # # So what's the probability of having more than $k$ times BS shock? # In[42]: k = 2 prob_sf = 1 - sp.stats.poisson.cdf(k, lambdaP) prob_sf_inv = sp.stats.poisson.cdf(k, lambdaP) print('The probability of having more than %1.0f BS shock in 5 years is %3.3f%%.' % (k, prob_sf*100)) # double % can escape formating # ## Poisson R.V. Generator # In[46]: poiss = sp.stats.poisson.rvs(lambdaP, size = 10000) fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(poiss, density = True, bins = 10) ax.set_title('Poisson Frequency Distribution', size = 16) plt.show() # ## Moments of Poisson Distribution # Again we can compute all most important moments # In[47]: poiss_stats = sp.stats.poisson.stats(k, moments = 'mvsk') # mean, variance, skewness, kurtosis table = Texttable() table.set_cols_align([ "c", "c", 'c','c']) table.set_cols_valign([ "m", "m", 'm','m']) table.add_rows([["mean", " variance ", ' skewness ', 'kurtosis '], [ poiss_stats[0], poiss_stats[1], poiss_stats[2], poiss_stats[3]]]) print(table.draw()) # # Geometric Distribution # The PMF of G-D is $f(k)=p(1-p)^k$, where $k$ is a non-negative integer. $p$ is the probability of success, and $k$ is the number of failures before success. # ## Example # # So G-D is trying to answer 'How many times you have to fail in order to embrace the first success?' # # If each trade has 1/1000 chance to encounter a BS shock, what is the prob of encounter a BS shock after trade $k$ times. # In[48]: k = 10 p = 1/1000 geodist = (1 - 1/1000)**10*1/1000 geodist print('The probability of observing exact %1.0f times of safe trading before a BS shock is %3.3f.' %(k, geodist)) # Or use built-in ```sp.stats.geom.pmf()``` # In[49]: sp.stats.geom.pmf(10, 1/1000) # In[50]: sp.stats.geom.cdf(k,p) print('The probability of observing %1.0f or fewer than %1.0f times of safe trading before a BS shock is %3.3f.'%(k,k,geodist)) # ## G-D Moments and Generator # The moments and randome generator are as following # In[51]: mean, var, skew, kurt = sp.stats.geom.stats(p, moments='mvsk') table = Texttable() table.set_cols_align([ "c", "c", 'c','c', 'c', 'c']) table.set_cols_valign([ "m", "m", 'm','m', 'm', 'm']) table.add_rows([['p','k',"mean", " variance ", ' skewness ', ' kurtosis '], [ p, k, mean, var, skew, kurt]]) print(table.draw()) # In[53]: geomet = sp.stats.geom.rvs(p, size = 10000) fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(geomet, density = True, bins = 30) ax.set_title('Geometric Frequency Distribution', size = 16) plt.show() # # Hypergeometric Distribution # The main difference between hypergeometric and binomial is that the former sampling is not independent of each other, i.e. the sampling is without replacement. # # The formula is # \begin{equation} # f(x) =\frac{{K\choose x} {M-K \choose N-x}}{{M\choose N}} # \end{equation} # ## Example # There is 100 candies in an urn, 20 are red, 80 are blue, if we take 5 of them out from it. What is the prob of having exact 4 red candies? # # Solution: # # \begin{equation} # \frac{{20\choose4}{80\choose1}}{{100\choose5}} # \end{equation} # # To solve it: # In[54]: C = sp.special.comb(20, 4)*sp.special.comb(80, 1) /sp.special.comb(100, 5) print('The prob of have 4 red candies by taking 5 out is %1.6f%%.'% (C*100)) # Or with built-in function ```sp.stats.hypergeom.pmf()``` # In[55]: # pmf(x, M, N, n, loc=0) hgeo = sp.stats.hypergeom.pmf(4, 100, 20, 5,loc = 0) # the arg order the same as MATLAB, not recommended hgeo # What is the probability that at most $4$ red candies are taken? i.e. The sum-up of probabilities of drawing $1$, $2$, $3$, and $4$ candies. # In[57]: hgeo_cdf = sp.stats.hypergeom.cdf(4, 100, 20, 5,loc = 0) # the arg order the same as MATLAB, not recommended print('The prob of have at most 4 red candies by taking 5 out is %1.6f%%.' %(hgeo_cdf*100)) # In[59]: hgeo_rv = sp.stats.hypergeom.rvs(100, 20, 5, size = 10000) fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(hgeo_rv, density = True) ax.set_title('Geometric Frequency Distribution', size = 16) s = ''' It can be interpreted as: if 100 candies in the urn, 20 are red, take 5 out of 100. The chance of getting from 1 to 5 red candies, is shown in the chart. As we can see it is nearly impossible to get 4 or 5 candies. But getting 1 red candy is the most possible outcome.''' ax.text(1.6, .5, s, fontsize=14, color ='red') plt.show() # In[60]: mean, var, skew, kurt = sp.stats.hypergeom.stats(100, 20, 5, moments='mvsk') table = Texttable() table.set_cols_align([ "c", "c", 'c','c', 'c', 'c']) table.set_cols_valign([ "m", "m", 'm','m', 'm', 'm']) table.add_rows([['M','k',"mean", " variance ", ' skewness ', ' kurtosis '], [ 100, 20, mean, var, skew, kurt]]) print(table.draw()) # # Continous Uniform Distribution # The PDF of Uniform Distribution is # # \begin{equation} # f(x)=\frac{1}{b-a} # \end{equation} # # And its r.v. generator is one of most frequently used function in NumPy: ```np.random.rand()``` # In[65]: unif = np.random.rand(10000) fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(unif, density = True, bins = 30) ax.set_title('Continous Uniform Frequency Distribution', size = 16) plt.show() # ## CDF and PDF of Continous Uniform Distribution # In[66]: # pdf(x, loc=0, scale=1) # cdf(x, loc=0, scale=1) x = np.linspace(-.2, 1.2, 100) unif_pdf = sp.stats.uniform.pdf(x) unif_cdf = sp.stats.uniform.cdf(x) fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (17, 7)) ax[0].plot(x,unif_pdf, lw = 4, label = 'PDF of Continouse U-D') ax[0].set_xlim([-.1, 1.1]) ax[0].set_ylim([0, 2]) ax[0].legend(fontsize = 16) ax[1].plot(x,unif_cdf, lw = 4, label = 'CDF of Continouse U-D') ax[1].set_xlim([-.2, 1.2]) ax[1].set_ylim([0, 2]) ax[1].legend(fontsize = 16) plt.show() # In[67]: mean, var, skew, kurt = sp.stats.uniform.stats(moments='mvsk') table = Texttable() table.set_cols_align([ "c", "c", 'c','c']) table.set_cols_valign([ "m", "m", 'm','m']) table.add_rows([["mean", " variance ", ' skewness ', ' kurtosis '], [ mean, var, skew, kurt]]) print(table.draw()) # # Normal Distribution # The most convenient method of creating normal distribution plot is to use ```sp.stats.norm.pdf()```. # # The PDF function single normal distribution is # # $$ # f(x)= \frac{1}{\sigma \sqrt{2 \pi}} e^{-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^{2}} # $$ # In[69]: mu = 2 sigma = 1 x = np.arange(-2, 6, 0.1) norm_pdf = sp.stats.norm.pdf(x, mu, sigma) norm_cdf = sp.stats.norm.cdf(x, mu, sigma) fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (17, 7)) ax[0].plot(x,norm_pdf, lw = 4, label = 'PDF of Normal Distribution', ls = '--') ax[0].legend(fontsize = 16, loc = 'lower left', framealpha=0.2) ax[1].plot(x,norm_cdf, lw = 4, label = 'CDF of Normal Distribution') ax[1].legend(fontsize = 16,fancybox=True, framealpha=0.5) plt.show() # ## Inverse Normal CDF # The inverse normal CDF is commonly used in calculating $p$-value, the plot below is useful to understand the idea. # In[72]: norm_95_r = sp.stats.norm.ppf(.975) # ppf mean point percentage function, actually inverse CDF norm_95_l = sp.stats.norm.ppf(.025) x = np.linspace(-5, 5, 200) y = sp.stats.norm.pdf(x) xl = np.linspace(-5, norm_95_l, 100) yl = sp.stats.norm.pdf(xl) xr = np.linspace(norm_95_r, 5, 100) yr = sp.stats.norm.pdf(xr) fig, ax = plt.subplots(figsize = (17, 7)) ax.plot(x,y, lw = 4, label = 'PDF of Normal Distribution', ls = '-', color = 'orange') ax.set_ylim([0, .45]) ax.fill_between(x, y, 0, alpha=0.1, color = 'blue') ax.fill_between(xl,yl, 0, alpha=0.6, color = 'blue') ax.fill_between(xr,yr, 0, alpha=0.6, color = 'blue') ax.text(-.2, 0.15, '95%', fontsize = 20) ax.text(-2.3, 0.015, '2.5%', fontsize = 12, color = 'white') ax.text(2.01, 0.015, '2.5%', fontsize = 12, color = 'white') ax.annotate('±%.4f' %norm_95_r, xy = (norm_95_r, 0), xytext = (-.4, .05), weight = 'bold', color = 'r', arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'), fontsize = 16) ax.annotate('±%.4f' %norm_95_r, xy = (norm_95_l, 0), xytext = (-.4, .05), weight = 'bold', color = 'r', arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'), fontsize = 16) ax.set_title('Normal Distribution And 2.5% Shaded Area', size = 20) plt.show() # ## Normal R.V. Generator # To generate a normal distribution histogram # In[74]: # rvs(loc=0, scale=1, size=1, random_state=None) norm_rv = sp.stats.norm.rvs(mu, sigma, size = 5000) fig, ax = plt.subplots(figsize = (12, 7)) h, bins, patches = ax.hist(norm_rv, density = True, bins = 50) ax.set_title('Normal Frequency Distribution', size = 16) plt.show() # In[75]: mean, var, skew, kurt = sp.stats.norm.stats(mu, sigma, moments='mvsk') table = Texttable() table.set_cols_align([ "c", "c", 'c','c']) table.set_cols_valign([ "m", "m", 'm','m']) table.add_rows([["mean", " variance ", ' skewness ', ' kurtosis '], [ mean, var, skew, kurt]]) print(table.draw()) # ## Bivariate Normal Distribution # The multivariate normal distribution density function is # \begin{equation} # f_\boldsymbol{X}(x_1,...,x_2)=\frac{1}{\sqrt{(2\pi)^k|\Sigma|}}\exp{\Big(-\frac{(x-\mu)^T\Sigma^{-1}(x-\mu)}{2}\Big)} # \end{equation} # # ### 1st Method of Formulation # In[76]: get_ipython().run_line_magic('matplotlib', 'inline') mu_x = 0 sigma_x = 2 mu_y = 0 sigma_y = 2 #Create grid and multivariate normal x = np.linspace(-10,10,500) y = np.linspace(-10,10,500) X, Y = np.meshgrid(x,y) pos = np.empty(X.shape + (2,)) pos[:, :, 0] = X; pos[:, :, 1] = Y # more technical than next one norm = sp.stats.multivariate_normal([mu_x, mu_y], [[sigma_x, 0], [0, sigma_y]]) # frozen #Make a 3D plot fig = plt.figure(figsize = (10, 6)) ax = fig.gca(projection='3d') ax.plot_surface(X, Y, norm.pdf(pos),cmap='viridis',linewidth=0) ax.set_xlabel('X axis') ax.set_ylabel('Y axis') ax.set_zlabel('Z axis') plt.show() # ### 2st Method of Formulation # In[77]: #Parameters to set mu_x = 0 sigma_x = 7 mu_y = 0 sigma_y = 15 x = np.linspace(-10,10,500) y = np.linspace(-10,10,500) X,Y = np.meshgrid(x,y) pos = np.array([X.flatten(),Y.flatten()]).T # more intuitive than last one rv = sp.stats.multivariate_normal([mu_x, mu_y], [[sigma_x, 0], [0, sigma_y]]) fig = plt.figure(figsize=(10,10)) ax0 = fig.add_subplot(111) ax0.contourf(X, Y, rv.pdf(pos).reshape(500,500),cmap='viridis') plt.show() # # Beta Distribution # The PDF of Beta distribution is # # # \begin{equation} # f(x, a, b)=\frac{\Gamma(a+b) x^{a-1}(1-x)^{b-1}}{\Gamma(a) \Gamma(b)} # \end{equation} # # where $0\leq x \leq 1$ and $a>0$, $b>0$, $\Gamma$ is the Gamma function. # # Beta distribution is a natural good choice for priors in Bayesian econometrics since its range is bounded in unit. # In[89]: params[0][0][1] # In[98]: x = np.linspace(0, 1, 100) fig = plt.figure(figsize=(9, 9)) ax = fig.add_subplot(111) params = np.array([[[.5,.5]], [[5,1]], [[1,5]], [[2,2]], [[2,3]], [[3,2]], [[1,1]]]) for i in range(params.shape[0]): beta_pdf = sp.stats.beta.pdf(x, params[i][0][0], params[i][0][1]) ax.plot(x, beta_pdf, lw = 3, label = '$a = %.1f, b = %.1f$' % (params[i][0][0], params[i][0][1])) ax.legend() ax.axis([0, 1, 0, 3]) # In[97]: fig = plt.figure(figsize=(9, 9)) ax = fig.add_subplot(111) x = np.linspace(0,1,100) params = np.array([[[.5,.5]], [[5,1]], [[1,5]], [[2,2]], [[2,3]], [[3,2]], [[1,1]]]) for i in range(params.shape[0]): beta_cdf = sp.stats.beta.cdf(x, params[i][0][0], params[i][0][1]) ax.plot(x, beta_cdf, lw = 3, label = '$a = %.1f, b = %.1f$' % (params[i][0][0], params[i][0][1])) ax.legend() ax.axis([0, 1, 0, 1]) # # $\chi^2$ Distribution # $\chi^2$ distribution is closely connected with normal distributions, if $z$ has the standard normal distribution, then $z^2$ has the $\chi^2$ distribution with $d.f.=1$. And futher,if # # # \begin{equation} # z_1, z_2, ..., z_k \sim i.i.d. N(0, 1) # \end{equation} # # Then summation has a $\chi^2$ distribution of $d.f. = k$: # # # \begin{equation} # \sum_{i=0}^k z_i^2 \sim \chi^2(k) # \end{equation} # # ## $\chi^2$ PDF and CDF # In[99]: k = 1 x = np.linspace(0, 5, 100) chi_pdf = sp.stats.chi2.pdf(x, k) fig, ax = plt.subplots(figsize = (12, 7)) ax.plot(x, chi_pdf, lw = 3, c = 'r', label = '$\chi^2$ distribution with d.f. = 1') ax.legend(fontsize = 18) plt.show() # In[100]: fig, ax = plt.subplots(figsize = (12, 7)) for i in range(1, 6): x = np.linspace(0, 5, 100) chi_pdf = sp.stats.chi2.pdf(x, i) ax.plot(x, chi_pdf, lw = 3, label = '$\chi^2$ distribution with d.f. = %.0d'%i) ax.legend(fontsize = 12) ax.axis([0, 5, 0, 1]) plt.show() # # F Distribution # If $U_1$ has a $\chi^2$ distribution with $\nu_1$ d.f. and $U_2$ has a $\chi^2$ distribution with $\nu_2$ d.f., then # # # \begin{equation} # \frac{U_1/\nu_1}{U_2/\nu_2}\sim F(\nu_1, \nu_2) # \end{equation} # # We are using $F$ distribution for ratios of variances. # In[101]: x = np.linspace(0, 5, 100) fig, ax = plt.subplots(figsize = (12, 7)) df1 = 10 df2 = 5 f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2) ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$' %(df1, df2)) df1 = 5 df2 = 10 f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2) ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$ '%(df1, df2)) df1 = 8 df2 = 15 f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2) ax.plot(x, f_pdf, lw =3, color = 'red', label = '$df_1 = %.d, df_2 = %.d$' %(df1, df2)) df1 = 15 df2 = 8 f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2) ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$ '%(df1, df2)) ax.legend(fontsize = 15) plt.show() # In[ ]: