#!/usr/bin/env python # coding: utf-8 # ### Hessians to compute variances # # - We have our point estimates, but now we want variances of the estimates # - We can approximate them (under some asymptotic conditions) using the _Hessian at the (negative) MLE minimum_ # - Think of the the Hessian as a matrix representing the curvature at the minimum point. If we move slightly away from that minimum, how much change do we see in the likelihood? # - high curvature (think of a tight peak) => low variance of estimate. # - low curvature (think of a broad hill) => high variance of estimate. # # To extract the parameter estimates, we invert the Hessian at the MLE, $\hat{\theta}$, and take the diagonal: # # $$\text{Var}(\hat{\theta}_i) = \text{diag}(H(\hat{\theta})^{-1})_i $$ # # The Hessian is the matrix of all second derivatives. `autograd` has this built in: # In[1]: from autograd import numpy as np from autograd import elementwise_grad, value_and_grad, hessian from scipy.optimize import minimize # In[2]: T = (np.random.exponential(size=1000)/1.5) ** 2.3 E = np.random.binomial(1, 0.95, size=1000) # In[3]: # seen all this... def cumulative_hazard(params, t): lambda_, rho_ = params return (t / lambda_) ** rho_ hazard = elementwise_grad(cumulative_hazard, argnum=1) def log_hazard(params, t): return np.log(hazard(params, t)) def log_likelihood(params, t, e): return np.sum(e * log_hazard(params, t)) - np.sum(cumulative_hazard(params, t)) def negative_log_likelihood(params, t, e): return -log_likelihood(params, t, e) from autograd import value_and_grad results = minimize( value_and_grad(negative_log_likelihood), x0 = np.array([1.0, 1.0]), method=None, args=(T, E), jac=True, bounds=((0.00001, None), (0.00001, None))) print(results) # In[4]: estimates_ = results.x # Note: this will produce a matrix hessian(negative_log_likelihood)(estimates_, T, E) # In[5]: H = hessian(negative_log_likelihood)(estimates_, T, E) variance_ = np.diag(np.linalg.inv(H)) print(variance_) # In[6]: std_error = np.sqrt(variance_) print(std_error) # In[7]: print("lambda_ %.3f (±%.2f)" % (estimates_[0], std_error[0])) print("rho_ %.3f (±%.2f)" % (estimates_[1], std_error[1])) # From here you can construct confidence intervals (CIs), and such. Let's move to Part 6, which is where you connect these abstract parameters to business logic.