#!/usr/bin/env python
# coding: utf-8

# # Introduction to iMinuit / Probfit

# In[1]:


get_ipython().run_line_magic('matplotlib', 'notebook')
import numpy as np
import matplotlib.pyplot as plt
import iminuit
import probfit


# # straigth line

# In[2]:


# Let's make a straight line with gaussian(mu=0, sigma=1) noise
np.random.seed(0)
x = np.linspace(0, 10, 20) 
y = 3.3 * x + 15.5 + np.random.randn(len(x))
err = 2*np.ones(len(x))+ 1.7*np.random.randn(len(x))
fig, ax = plt.subplots()
ax.errorbar(x, y, err, fmt='.')


# In[3]:


# Let's define our line.
# First argument has to be the independent variable,
# arguments after that are shape parameters.
def line(x, m, c): # define it to be parabolic or whatever you like
    return m * x + c


# In[4]:


iminuit.describe(line)


# In[5]:


# Define a chi^2 cost function
chi2 = probfit.Chi2Regression(line, x, y)

# Chi2Regression is just a callable object; nothing special about it
iminuit.describe(chi2)


# In[6]:


# minimize it
# yes, it gives you a heads up that you didn't give it initial value
# we can ignore it for now
firstFit = iminuit.Minuit(chi2,m=1) 
firstFit.migrad();


# In[7]:


def cust_chi2(m,c):
    """
    here the arrays x,y and err are external
    and must be defined before this block
    """
    chi2_array =np.array([])
    expected = line(x,m,c)
    observed = y
    c = ((observed)- expected)**2 / err**2
    chi2_array = np.append(chi2_array,c)
    return np.nansum(chi2_array)


# In[8]:


secondFit = iminuit.Minuit(cust_chi2, 
         m=3.0, error_m=0.01,     limit_m=(2.5,4.),
         c=15., error_c=.01,     limit_c=(12.,37.))

secondFit.migrad()


# In[9]:


# The output above is a pretty-printed summary of the fit results from
# minuit.print_fmin()
# which was automatically called by iminuit.Minuit.migrad() after running MIGRAD.

# Let's see our results as Python dictionaries ...
print("First fit")
print(firstFit.values)
print(firstFit.errors)
print("Second fit")
print(secondFit.values)
print(secondFit.errors)


# In[10]:


fig, ax = plt.subplots()
ax.errorbar(x, y, err, fmt='.')
ax.plot(x,line(x,firstFit.values['m'],firstFit.values['c']),label="profit")
ax.plot(x,line(x,secondFit.values['m'],secondFit.values['c']),label="custom fit")
legend = ax.legend(loc='lower right', ncol=1, shadow=False, fontsize=14)


# # Gaussian 

# In[11]:


# First let's make some example data
np.random.seed(0)
data = np.random.randn(10000) * 4 + 1
# sigma = 4 and mean = 1
fig, ax = plt.subplots()
hist, bins,_ = ax.hist(data, bins=50, histtype='step',normed=True)


# In[12]:


# Define your PDF / model
def gauss_pdf(x, mu, sigma):
    """Normalized Gaussian"""
    return 1 / np.sqrt(2 * np.pi) / sigma * np.exp(-(x - mu) ** 2 / 2. / sigma ** 2)


# In[13]:


# Build your cost function
# Here we use binned likelihood
binned_likelihood = probfit.BinnedLH(gauss_pdf, data)


# In[14]:


# Create the minuit
# and give an initial value for the sigma parameter
gaus_1st = iminuit.Minuit(binned_likelihood, sigma=3)
# Remember: minuit.errordef is automatically set to 0.5
# as required for likelihood fits (this was explained above)


# In[15]:


gaus_1st.migrad()
# Like in all binned fit with long zero tail. It will have to do something about the zero bin
# probfit.BinnedLH does handle them gracefully but will give you a warning;


# In[16]:


bins_x = (bins[:-1]+bins[1:])/2

def cust_gauss(mu,sigma):
    chi2_array =np.array([])
    expected = gauss_pdf(bins_x, mu, sigma)
    observed = hist
    c = ((observed)- expected)**2 / expected
    chi2_array = np.append(chi2_array,c)
    return np.nansum(chi2_array)

gaus_2st = iminuit.Minuit(cust_gauss, 
         mu=.5, error_mu=0.01,     limit_mu=(-4.,4.),
         sigma=4., error_sigma=.01,     limit_sigma=(1.,10.))

gaus_2st.migrad();


# In[17]:


# generate canvas
fig, ax = plt.subplots()
# plot data
hist, bins= np.histogram(data, bins=50,normed=True)
ax.errorbar(bins_x,hist, yerr=np.sqrt(hist/len(data)),fmt='o',capsize=3,
            markersize=4
           )
# plot the two fits
ax.plot(bins_x,gauss_pdf(bins_x,gaus_1st.values['mu'],gaus_1st.values['sigma']),lw=2,label="binned Likelyhood\n(profit)")
ax.plot(bins_x,gauss_pdf(bins_x,gaus_2st.values['mu'],gaus_2st.values['sigma']),'r--', lw=2,label="general minimization")
# generate legend
ax.legend(loc='upper right', ncol=1, shadow=False, fontsize=14)


# In[18]:


#drawing it nicely
fig, ax = plt.subplots()
gaus_1st.draw_mncontour('mu','sigma', nsigma=4);


# In[19]:


np.random.seed(0)
data_peak1 = np.random.randn(3000) * 0.2 + 2
data_peak2 = np.random.randn(5000) * 0.1 + 4
data_range = (-2, 5)
data_bg = probfit.gen_toy(lambda x : 4 + 4 * x + x ** 2, 20000, data_range)
data_all = np.concatenate([data_peak1, data_peak2, data_bg])
fig, ax = plt.subplots()
ax.hist((data_peak1, data_peak2, data_bg, data_all),
         label=['Signal 1', 'Signal 2', 'Background', 'Total'],
         bins=200, histtype='step', range=data_range)
ax.legend(loc='upper left')


# In[20]:


# Using a polynomial to fit a distribution is problematic, because the
# polynomial can assume negative values, which results in NaN (not a number)
# values in the likelihood function.
# To avoid this problem we restrict the fit to the range (0, 5) where
# the polynomial is clearly positive.
fit_range = (0, 5)
normalized_poly = probfit.Normalized(probfit.Polynomial(2), fit_range)
normalized_poly = probfit.Extended(normalized_poly, extname='NBkg')

gauss1 = probfit.Extended(probfit.rename(probfit.gaussian, ['x', 'mu1', 'sigma1']), extname='N1')
gauss2 = probfit.Extended(probfit.rename(probfit.gaussian, ['x', 'mu2', 'sigma2']), extname='N2')

# Define an extended PDF consisting of three components
pdf = probfit.AddPdf(normalized_poly, gauss1, gauss2)

print('normalized_poly: {}'.format(probfit.describe(normalized_poly)))
print('gauss1:          {}'.format(probfit.describe(gauss1)))
print('gauss2:          {}'.format(probfit.describe(gauss2)))
print('pdf:             {}'.format(probfit.describe(pdf)))


# In[21]:


# Define the cost function in the usual way ...
binned_likelihood = probfit.BinnedLH(pdf, data_all, bins=200, extended=True, bound=fit_range)

# This is a quite complex fit (11 free parameters!), so we need good starting values.
# Actually we even need to set an initial parameter error
# for 'mu1' and 'mu2' to make MIGRAD converge.
# The initial parameter error is used as the initial step size in the minimization.
pars = dict(mu1=1.9, error_mu1=0.1, sigma1=0.2, N1=3000,
            mu2=4.1, error_mu2=0.1, sigma2=0.1, N2=5000,
            c_0=4, c_1=4, c_2=1, NBkg=20000)
# define iminuit object
two_gaussians = iminuit.Minuit(binned_likelihood, pedantic=False, print_level=0, **pars)
# You can see that the model already roughly matches the data
fig, ax = plt.subplots()
binLik=binned_likelihood.draw(two_gaussians, parts=True)


# In[22]:


# This can take a while ... the likelihood is evaluated a few 100 times
# (and each time the distributions are evaluated, including the
# numerical computation of the normalizing integrals)
two_gaussians.migrad()


# In[23]:


fig, ax = plt.subplots()
binLik=binned_likelihood.show(two_gaussians, parts=True)


# In[24]:


# You should copy & paste the return tuple from the `draw` docstring ...
((data_edges, datay), (errorp, errorm), (total_pdf_x, total_pdf_y), parts) = binned_likelihood.draw(two_gaussians, parts=True);
# ... now we have everything to make our own plot


# In[25]:


# Now make the plot as pretty as you like, e.g. with matplotlib.
plt.figure(figsize=(8, 5))
plt.errorbar(probfit.mid(data_edges), datay, errorp, fmt='.', capsize=0, color='Gray', label='Data')
plt.plot(total_pdf_x, total_pdf_y, color='blue', lw=2, label='Total Model')
colors = ['orange', 'purple', 'DarkGreen']
labels = ['Background', 'Signal 1', 'Signal 2']
for color, label, part in zip(colors, labels, parts):
    x, y = part
    plt.plot(x, y, ls='--', color=color, label=label)
plt.grid(True)
plt.legend(loc='upper left')


# In[ ]: