#!/usr/bin/env python # coding: utf-8 # # Week 7: Discrete bivariate distributions # # # #### [Back to main page](https://petrosyan.page/fall2020math3215) # # ### Range and histogram of a joint pmf # # The range and the histogram of the joint pmf of rolling a dice and recording the larger value as $X$ and the smaller value as $Y$. The blue dot is the point $(\mu_X,\mu_Y)$ and it is the center of the joint distribution. # In[4]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np plt.figure(figsize=(6,6)) xmean=0 for x in range(1,7): xmean = xmean + x*(1/36+2*(6-x)/36) ymean=0 for y in range(1,7): ymean = ymean + y*(1/36+2*(y-1)/36) x_range = [] y_range = [] z_val = [] for y in range(1,7): plt.scatter(y,y, color="red", edgecolor="black",alpha=0.3) x_range.append(y) y_range.append(y) z_val.append(2/36) for x in range(1,y): x_range.append(x) y_range.append(y) z_val.append(1/36) plt.scatter(x,y, color="red", edgecolor="black",alpha=0.6) plt.scatter(xmean, ymean, s=100 ) plt.xlabel("x") plt.ylabel("y") plt.show(); # In[2]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np from mpl_toolkits.mplot3d import Axes3D # setup the figure and axes fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111, projection='3d') x_range = [] y_range = [] z_val = [] for y in range(1,7): x_range.append(y) y_range.append(y) z_val.append(1/36) for x in range(1,y): x_range.append(x) y_range.append(y) z_val.append(2/36) x_cord = np.array(x_range) y_cord = np.array(y_range) z_cord = np.zeros(y_cord.shape) x_width = np.ones(y_cord.shape)*0.8 y_width = np.ones(y_cord.shape)*0.8 z_height = np.array(z_val) ax.bar3d(x_cord-0.5, y_cord-0.5, z_cord, x_width, y_width, z_height, shade=True, alpha=1, color='#039be5', edgecolor='black') ax.view_init(20,-90-25) plt.show(); # ### The range of the bivariate hypergeometric distribution # In[95]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(10,10)) N=50 K=15 L=15 n=20 x_range = [] y_range = [] for x in range(K+1): for y in range(L+1): if (N-K-L>=n-x-y) & (n-x-y>=0): x_range.append(x) y_range.append(y) alpha = comb(K, x, exact=True) *comb(L, y, exact=True)* comb(N-K-L, n-x-y, exact=True) / comb(N, n, exact=True) plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*9)) plt.xticks(np.arange(0,K+1,1)) plt.yticks(np.arange(0,L+1,1)) plt.title("N={}, K={}, L={}".format(N,K,L)) plt.xlabel("x") plt.ylabel("y") plt.show(); # ### The range of the trinomial distribution # In[45]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(6,6)) n=10 pS=0.3 pF=0.25 pI=0.45 x_range = [] y_range = [] for x in range(n): for y in range(1,n-x+1): alpha = comb(n, x, exact=True) *comb(n-x, y) * pS**x * pF**y * pI**(n-x-y) plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*10)) plt.xticks(np.arange(0,n,1)) plt.yticks(np.arange(0,n,1)) plt.title(r"n={}, $p_S=${}, $p_F=${}, $p_I=${}".format(n,pS,pF, pI)) plt.xlabel("x") plt.ylabel("y") plt.show(); # In[9]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(6,6)) n=15 pS=6/10 pI=3/10 pF=1/10 x_range = [] y_range = [] for x in range(n+1): for y in range(n-x+1): alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y) plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*10)) plt.xticks(np.arange(0,n+1,1)) plt.yticks(np.arange(0,n+1,1)) plt.title(r"n={}, $p_S=${}, $p_I=${}, $p_F=${}".format(n,pS,pI, pF)) plt.xlabel("x") plt.ylabel("y") plt.show(); # ### Least squares line fits a line into the distribution # In[7]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(6,6)) N=50 K=15 L=25 n=20 meanx=n*K/N meany=n*L/N sigmax=np.sqrt(n*(K/N)*(1-K/N)*(N-n)/(N-1)) sigmay=np.sqrt(n*(L/N)*(1-L/N)*(N-n)/(N-1)) cov=-n*(N-n)*K*L/((N-1)*N**2) rho = cov/(sigmax*sigmay) x_range = [] y_range = [] for x in range(K+1): for y in range(L+1): if (N-K-L>=n-x-y) & (n-x-y>=0): x_range.append(x) y_range.append(y) alpha = comb(K, x, exact=True) *comb(L, y, exact=True)* comb(N-K-L, n-x-y, exact=True) / comb(N, n, exact=True) plt.scatter(x,y, edgecolor=(0,0,1,alpha*10), color=(1,0,0,alpha*10)) xval=np.linspace(0, K, 100) yval = rho*sigmay/sigmax*(xval-meanx)+meany plt.plot(xval,yval, color="red", linewidth=2) plt.scatter(meanx, meany, s=100 ) plt.xticks(np.arange(0,K+1,1)) plt.yticks(np.arange(0,L+1,1)) plt.title("Linear regression line for bivariate hypergeometric") plt.xlabel("x") plt.ylabel("y") plt.figtext(0.8,0.8, " N={}\n K={}\n L={}".format(N,K,L), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") plt.show(); # In[97]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(6,6)) n=15 pS=6/10 pI=3/10 pF=1/10 meanx=n*pS meany=n*pI x_range = [] y_range = [] for x in range(n): for y in range(1,n-x+1): alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y) plt.scatter(x,y, edgecolor=(0,0,1,max(min(alpha*5,0.9),0.01)), color=(1,0,0,max(min(alpha*5,0.9),0.01))) xval=np.linspace(0, n, 100) yval = (n-xval)*pI/(1-pS) plt.plot(xval,yval, color="red", linewidth=2) plt.scatter(meanx, meany, s=100 ) #plt.xticks(np.arange(0,n,1)) #plt.yticks(np.arange(0,n,1)) plt.title("Least squares line for trinomial distribution") plt.figtext(0.7,0.8, " n={}\n".format(n)+r" $p_S=${}".format(pS)+"\n"+ r" $p_I=${}".format(pI), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") plt.xlabel("x") plt.ylabel("y") plt.show(); # In[121]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(6,6)) n=15 pS=0.6 pI=0.2 pF=0.2 meanx=n*pS meany=n*pI x_range = [] y_range = [] for x in range(n): for y in range(1,n-x+1): alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y) plt.scatter(x,y, edgecolor=(0,0,1,max(min(alpha*10,0.9),0.01)), color=(1,0,0,max(min(alpha*10,0.9),0.01))) def std(xval): var = (n-xval)*pI*pF/(1-pS)**2 std = np.power(var,1/2) return std xval=np.linspace(0, K, 100) yval = (n-xval)*pI/(1-pS) plt.fill_between(xval, yval-std(xval), yval+std(xval), alpha=0.1) plt.plot(xval,yval, color="red", linewidth=2) plt.scatter(meanx, meany, s=100 ) plt.xticks(np.arange(0,n,1)) plt.yticks(np.arange(0,n,1)) plt.title("Least squares line with standard deviation") plt.figtext(0.7,0.8, " n={}\n".format(n)+r" $p_S=${}".format(pS)+"\n"+ r" $p_I=${}".format(pI)+"\n"+r" $p_F=${}".format(pF), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") plt.xlabel("x") plt.ylabel("y") plt.show(); # In[7]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(6,6)) n=15 pS=6/10 pI=3/10 pF=1/10 x_range = [] y_range = [] for x in range(n+1): for y in range(0,n-x+1): plt.scatter(x,y, color=(1,0,0,0.5)) plt.xticks(np.arange(0,n+1,1)) plt.yticks(np.arange(0,n+1,1)) plt.title(r"n={}, $p_S=${}, $p_I=${}, $p_F=${}".format(n,pS,pI, pF)) plt.xlabel("x") plt.ylabel("y") plt.show(); # In[12]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np from scipy.special import comb plt.figure(figsize=(10,10)) meanx=0.49 meany=0.22 cov = -0.1578 varx = 0.7099 condmean = np.array([0.02, 0.08, 0.09, -0.06]) x_range = np.array([-1,0,1,2]) y_range = np.array([-1,0,1]) pmf=np.array([[0.01, 0.04, 0.03, 0.1], [0.08, 0.22, 0.24, 0.06], [0.03, 0.12, 0.12, 0.04]]) for i in range(len(x_range)): for j in range(len(y_range)): x = x_range[i] y = y_range[j] alpha = pmf[j,i] plt.scatter(x,y, edgecolor=(0,0,1,max(min(alpha*5,0.9),0.01)), color=(1,0,0,max(min(alpha*5,0.9),0.01))) xval=np.linspace(-2, 3, 10) yval = (xval-meanx)*cov/varx + meany plt.plot(xval,yval, linewidth=2, label="least squares") plt.plot(x_range,condmean, linewidth=2, label= "cond mean") plt.scatter(meanx, meany, s=100 ) #plt.xticks(np.arange(0,n,1)) #plt.yticks(np.arange(0,n,1)) plt.title("Least squares line for trinomial distribution") plt.xlabel("x") plt.ylabel("y") plt.legend() plt.show(); # In[ ]: