#!/usr/bin/env python
# coding: utf-8

# # Week 7: Discrete bivariate distributions
# 
# 
#  #### [Back to main page](https://petrosyan.page/fall2020math3215)
# 

# ### Range and histogram of a joint pmf 
# 
# The range and the histogram of the joint pmf of rolling a dice and recording the larger value as $X$ and the smaller value as $Y$. The blue dot is the point $(\mu_X,\mu_Y)$ and it is the center of the joint distribution.

# In[4]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(6,6))

xmean=0
for x in range(1,7):
    xmean = xmean + x*(1/36+2*(6-x)/36)  

ymean=0
for y in range(1,7):
    ymean = ymean + y*(1/36+2*(y-1)/36)   


x_range = []
y_range = []
z_val = []
for y in range(1,7):
    plt.scatter(y,y, color="red", edgecolor="black",alpha=0.3)
    x_range.append(y)
    y_range.append(y)
    z_val.append(2/36)
    for x in range(1,y):
        x_range.append(x)
        y_range.append(y)
        z_val.append(1/36)
        plt.scatter(x,y, color="red", edgecolor="black",alpha=0.6)
        
        
plt.scatter(xmean, ymean, s=100 )        
plt.xlabel("x")
plt.ylabel("y")

plt.show();


# In[2]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# setup the figure and axes
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')

x_range = []
y_range = []
z_val = []
for y in range(1,7):
    x_range.append(y)
    y_range.append(y)
    z_val.append(1/36)
    for x in range(1,y):
        x_range.append(x)
        y_range.append(y)
        z_val.append(2/36)
        
x_cord = np.array(x_range)
y_cord = np.array(y_range)
z_cord = np.zeros(y_cord.shape)
x_width = np.ones(y_cord.shape)*0.8
y_width = np.ones(y_cord.shape)*0.8
z_height = np.array(z_val)        
ax.bar3d(x_cord-0.5, y_cord-0.5, z_cord, x_width, y_width, z_height, shade=True, alpha=1, color='#039be5', edgecolor='black')
ax.view_init(20,-90-25)

plt.show();


# ### The range of the bivariate hypergeometric distribution

# In[95]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(10,10)) 


N=50
K=15
L=15
n=20


x_range = []
y_range = []
for x in range(K+1):
    for y in range(L+1):
        if (N-K-L>=n-x-y) & (n-x-y>=0):
            x_range.append(x)
            y_range.append(y)
            alpha = comb(K, x, exact=True) *comb(L, y, exact=True)* comb(N-K-L, n-x-y, exact=True) / comb(N, n, exact=True)
            plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*9))
            
plt.xticks(np.arange(0,K+1,1))
plt.yticks(np.arange(0,L+1,1))
plt.title("N={}, K={}, L={}".format(N,K,L))
plt.xlabel("x")
plt.ylabel("y")
plt.show();


# ### The range of the trinomial distribution

# In[45]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=10
pS=0.3
pF=0.25
pI=0.45

x_range = []
y_range = []
for x in range(n):
    for y in range(1,n-x+1):
        alpha = comb(n, x, exact=True) *comb(n-x, y) * pS**x * pF**y * pI**(n-x-y)
        plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*10))

plt.xticks(np.arange(0,n,1))
plt.yticks(np.arange(0,n,1))
plt.title(r"n={}, $p_S=${}, $p_F=${}, $p_I=${}".format(n,pS,pF, pI))
plt.xlabel("x")
plt.ylabel("y")

plt.show();


# In[9]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=15
pS=6/10
pI=3/10
pF=1/10

x_range = []
y_range = []
for x in range(n+1):
    for y in range(n-x+1):
        alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y)
        plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*10))

plt.xticks(np.arange(0,n+1,1))
plt.yticks(np.arange(0,n+1,1))
plt.title(r"n={}, $p_S=${}, $p_I=${}, $p_F=${}".format(n,pS,pI, pF))
plt.xlabel("x")
plt.ylabel("y")

plt.show();


# ### Least squares line fits a line into the distribution

# In[7]:


# nbi:hide_in

import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


N=50
K=15
L=25
n=20

meanx=n*K/N
meany=n*L/N
sigmax=np.sqrt(n*(K/N)*(1-K/N)*(N-n)/(N-1)) 
sigmay=np.sqrt(n*(L/N)*(1-L/N)*(N-n)/(N-1)) 
cov=-n*(N-n)*K*L/((N-1)*N**2)
rho = cov/(sigmax*sigmay)

x_range = []
y_range = []
for x in range(K+1):
    for y in range(L+1):
        if (N-K-L>=n-x-y) & (n-x-y>=0):
            x_range.append(x)
            y_range.append(y)
            alpha = comb(K, x, exact=True) *comb(L, y, exact=True)* comb(N-K-L, n-x-y, exact=True) / comb(N, n, exact=True)
            plt.scatter(x,y, edgecolor=(0,0,1,alpha*10), color=(1,0,0,alpha*10))
            
            
xval=np.linspace(0, K, 100)
yval = rho*sigmay/sigmax*(xval-meanx)+meany
plt.plot(xval,yval, color="red", linewidth=2)
plt.scatter(meanx, meany, s=100 )                    
plt.xticks(np.arange(0,K+1,1))
plt.yticks(np.arange(0,L+1,1))
plt.title("Linear regression line for bivariate hypergeometric")
plt.xlabel("x")
plt.ylabel("y")
plt.figtext(0.8,0.8, " N={}\n K={}\n L={}".format(N,K,L), ha="left", va="top",
            backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.show();


# In[97]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=15
pS=6/10
pI=3/10
pF=1/10

meanx=n*pS
meany=n*pI

x_range = []
y_range = []
for x in range(n):
    for y in range(1,n-x+1):
        alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y)
        plt.scatter(x,y, edgecolor=(0,0,1,max(min(alpha*5,0.9),0.01)), color=(1,0,0,max(min(alpha*5,0.9),0.01)))

xval=np.linspace(0, n, 100)
yval = (n-xval)*pI/(1-pS)
plt.plot(xval,yval, color="red", linewidth=2)
plt.scatter(meanx, meany, s=100 )   
#plt.xticks(np.arange(0,n,1))
#plt.yticks(np.arange(0,n,1))
plt.title("Least squares line for trinomial distribution")
plt.figtext(0.7,0.8, " n={}\n".format(n)+r" $p_S=${}".format(pS)+"\n"+
            r" $p_I=${}".format(pI), ha="left", va="top",
            backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.xlabel("x")
plt.ylabel("y")

plt.show();


# In[121]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=15
pS=0.6
pI=0.2
pF=0.2

meanx=n*pS
meany=n*pI

x_range = []
y_range = []
for x in range(n):
    for y in range(1,n-x+1):
        alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y)
        plt.scatter(x,y, edgecolor=(0,0,1,max(min(alpha*10,0.9),0.01)), color=(1,0,0,max(min(alpha*10,0.9),0.01)))
        
def std(xval):
    var = (n-xval)*pI*pF/(1-pS)**2
    std = np.power(var,1/2)
    return std

xval=np.linspace(0, K, 100)
yval = (n-xval)*pI/(1-pS)
plt.fill_between(xval, yval-std(xval), yval+std(xval), alpha=0.1)
plt.plot(xval,yval, color="red", linewidth=2)
plt.scatter(meanx, meany, s=100 )   
plt.xticks(np.arange(0,n,1))
plt.yticks(np.arange(0,n,1))
plt.title("Least squares line with standard deviation")
plt.figtext(0.7,0.8, " n={}\n".format(n)+r" $p_S=${}".format(pS)+"\n"+
            r" $p_I=${}".format(pI)+"\n"+r" $p_F=${}".format(pF), ha="left", va="top",
            backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.xlabel("x")
plt.ylabel("y")


plt.show();


# In[7]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=15
pS=6/10
pI=3/10
pF=1/10

x_range = []
y_range = []
for x in range(n+1):
    for y in range(0,n-x+1):
        plt.scatter(x,y,   color=(1,0,0,0.5))

plt.xticks(np.arange(0,n+1,1))
plt.yticks(np.arange(0,n+1,1))
plt.title(r"n={}, $p_S=${}, $p_I=${}, $p_F=${}".format(n,pS,pI, pF))
plt.xlabel("x")
plt.ylabel("y")

plt.show();


# In[12]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(10,10)) 


meanx=0.49
meany=0.22
cov = -0.1578
varx = 0.7099

condmean = np.array([0.02, 0.08, 0.09, -0.06])

x_range = np.array([-1,0,1,2])
y_range = np.array([-1,0,1])

pmf=np.array([[0.01, 0.04, 0.03, 0.1],
             [0.08, 0.22, 0.24, 0.06],
             [0.03, 0.12, 0.12, 0.04]])

for i in range(len(x_range)):
    for j in range(len(y_range)):
        x = x_range[i]
        y = y_range[j]
        alpha = pmf[j,i]
        plt.scatter(x,y, edgecolor=(0,0,1,max(min(alpha*5,0.9),0.01)), color=(1,0,0,max(min(alpha*5,0.9),0.01)))
        

xval=np.linspace(-2, 3, 10)
yval = (xval-meanx)*cov/varx + meany
plt.plot(xval,yval,   linewidth=2, label="least squares")
plt.plot(x_range,condmean,   linewidth=2, label= "cond mean")

plt.scatter(meanx, meany, s=100 )   
#plt.xticks(np.arange(0,n,1))
#plt.yticks(np.arange(0,n,1))
plt.title("Least squares line for trinomial distribution")

plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show();


# In[ ]: