#!/usr/bin/env python
# coding: utf-8

# # 🏆  Homework

# ## Maximum Likelihood Estimation of Parameters of Gamma Distribution

# ## Answer by Muhammed Talha Kaya

# Let's consider the maximum likelihood estimation for the parameters of Gamma distribution with unknown scale parameter $\theta$ and shape parameter $\kappa$ based on a random sample of size n.

# The probability density function of Gamma distribution with unknown parameters ($\theta, \kappa$) is:
# 
# $f(x;\theta, \kappa) = \frac{1}{\theta^\kappa \Gamma(\kappa)}x^{\kappa-1}e^{-\frac{x}{\theta}} $ for $x>0$ and $\theta,\kappa>0$.
# 
# Then the likelihood and log-likelihood functions based on a random sample of size n are defined, respectively, as follows:
# 
# $L(\theta, \kappa) = \frac{1}{\theta^{n\kappa} \Gamma(\kappa)^n} \big(\prod_{i}^{n} x_i\big)^{\kappa-1}e^{-\sum_{i}^{n} \frac{x_i}{\theta}} $ for $x>0$ and $\theta,\kappa>0$, and
# 
# $\log(L(\theta, \kappa)) = -n \kappa\log(\theta)-n \log(\Gamma(\kappa)) + (\kappa-1)\log(\prod_{i}^{n} x_i)-\frac{\sum_{i}^{n} x_i}{\theta}$.
# 
# The partial derivatives are:
# 
# $\frac{\partial\log(L(\theta, \kappa))}{\partial\theta }= -\frac{n \kappa}{\theta} + \frac{\sum_{i}^{n} x_i}{\theta^2}$ and 
# 
# $\frac{\partial\log(L(\theta, \kappa))}{\partial\kappa }= -n \log(\theta) - n \frac{\Gamma'(\kappa)}{\Gamma(\kappa)} +
# \log(\prod_{i}^{n} x_i)$.

# Let $\psi(\kappa) = \frac{\Gamma'(\kappa)}{\Gamma(\kappa)}$ denote the psi function and $\tilde X = \big(\prod_{i}^{n} x_i\big)^{\frac{1}{n}}$, where $n \log(\tilde X) = \log(\prod_{i}^{n} x_i)$, denote the geometric mean of the sample, then set the derivatives equal to zero:
# 
# $\frac{\partial\log(L(\theta, \kappa))}{\partial\theta }= -\frac{n \kappa}{\theta} + \frac{\sum_{i}^{n} x_i}{\theta^2}=0 \Rightarrow $ 
# 
# $\hat{\theta}=  \frac{\sum_{i}^{n} x_i}{n \hat{\kappa}} = \frac{\bar x}{\hat{\kappa}}$.
# 
# So $\hat{\theta}_{MLE}$ depends on $\hat{\kappa}_{MLE}$. 
# 
# $\frac{\partial\log(L(\theta, \kappa))}{\partial\kappa }= -n \log(\frac{\bar x}{\hat{\kappa}}) - n \psi(\hat{\kappa}) +
# n \log(\tilde X)
# =-n \log(\bar x) + n \log(\hat{\kappa})-n \psi(\hat{\kappa}) + n  \log(\tilde X) = \log(\hat{\kappa})-\psi(\hat{\kappa}) + \log(\frac{\tilde X}{\bar x})=0$.

# The last ML equation cannot be solved in closed form due to non-linear form of $\kappa$. We need an iterative algorithm to find the root.

# İbrahim Talha suggests to use Newton-Raphson algoritm to find the root of the last ML equation iteratively as follows:
# 
# $\hat{\kappa}_{n+1} = \hat{\kappa}_{n}-\frac{f(\hat{\kappa}_{n})}{f'(\hat{\kappa}_{n})}$,
# 
# where $f(\hat{\kappa}_{n})= \log(\hat{\kappa}_{n})-\psi(\hat{\kappa}_{n}) + \log(\frac{\tilde X}{\bar x})$, $f'(\kappa_{n})= \frac{1}{\hat{\kappa}_{n}}-\psi'(\hat{\kappa}_{n})$ with $\psi(\hat{\kappa}_{n}) = \frac{\Gamma'(\hat{\kappa}_{n})}{\Gamma(\hat{\kappa}_{n})}$ and $\tilde X = \big(\prod_{i}^{n} x_i\big)^{\frac{1}{n}}$.

# In[1]:


#import required libraries
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as stats
from scipy import special
import math
import random


# In[4]:


#special.polygamma(1, x)

#Generate 10000 data with gamma distribution
#np.random.gamma(Kappa,Theta,Size of Data)
data = np.random.gamma(shape=2,scale=2,size=500)
#we are estimating shape parameter

#define x_tilda based on the data
def x_tilda(data):

	x_tilda = 0

	for item in data:

		x_tilda = x_tilda+ math.log(item)

	return math.exp(x_tilda*(1/np.size(data)))

x_tilda = x_tilda(data)

x_bar = np.mean(data)

#scipy.special.polygamma(n, x) = nth derivative of the psi(x) function
def psi(k_hat): 
	
	psi_value = special.polygamma(0, k_hat)

	return psi_value

def fd_psi(k_hat): #first derivative of psi
	
	fd_psi_value = special.polygamma(1, k_hat)

	return fd_psi_value


def kappa_function(k_hat,x_tilda,x_bar): #F(Kappa Hat) =? 0 

	kappa_function_result = math.log(k_hat) - psi(k_hat) + math.log(x_tilda) - math.log(x_bar)
	#print(kappa_function_result)
	return kappa_function_result

def fd_kappa_function(k_hat): #first derivative of kappa_function

	fd_kappa_function_result = (1/k_hat) - fd_psi(k_hat)
	#print(fd_kappa_function_result)
	return fd_kappa_function_result

#Newthon-Raphson Method
def New_Rap(k_n,x_tilda,x_bar): #Newthon-Raphson Method will return a value closer to the root for the given initial value.

	new_k_n = k_n - (kappa_function(k_n,x_tilda,x_bar)/fd_kappa_function(k_n))
	#print(new_k_n)

	return new_k_n

k_n = 1 # take initial kappa value 1 (k>0) 
#Initial value can be method of moment estimate:np.mean(data)**2/np.var(data) 
##https://ocw.mit.edu/courses/mathematics/18-443-statistics-for-applications-spring-2015/lecture-notes/MIT18_443S15_LEC3.pdf
tolerance = 10**-5

print("\nInitial Kappa = {}".format(k_n))
print("Function Value in Initial Kappa = {}\n".format(kappa_function(k_n,x_tilda,x_bar)))

while(abs(kappa_function(k_n,x_tilda,x_bar))>tolerance):

	k_n = New_Rap(k_n,x_tilda,x_bar)
	print("New Kappa = {}".format(k_n))
	print("Function Value in New Kappa = {}\n".format(kappa_function(k_n,x_tilda,x_bar)))

#Find theta_hat
theta_hat = np.mean(data)/k_n
print("Theta Hat = {}".format(theta_hat))


"""
sort_data = np.sort(data)

plt.plot(sort_data, stats.gamma.pdf(sort_data, a = 2, scale = 2))

plt.show() 
"""


# 🏆: ☕ + 🍰 at Espresso Lab, MED.

# In[ ]: