#!/usr/bin/env python # coding: utf-8 # # Introduction to iMinuit / Probfit # In[1]: get_ipython().run_line_magic('matplotlib', 'notebook') import numpy as np import matplotlib.pyplot as plt import iminuit import probfit # # straigth line # In[2]: # Let's make a straight line with gaussian(mu=0, sigma=1) noise np.random.seed(0) x = np.linspace(0, 10, 20) y = 3.3 * x + 15.5 + np.random.randn(len(x)) err = 2*np.ones(len(x))+ 1.7*np.random.randn(len(x)) fig, ax = plt.subplots() ax.errorbar(x, y, err, fmt='.') # In[3]: # Let's define our line. # First argument has to be the independent variable, # arguments after that are shape parameters. def line(x, m, c): # define it to be parabolic or whatever you like return m * x + c # In[4]: iminuit.describe(line) # In[5]: # Define a chi^2 cost function chi2 = probfit.Chi2Regression(line, x, y) # Chi2Regression is just a callable object; nothing special about it iminuit.describe(chi2) # In[6]: # minimize it # yes, it gives you a heads up that you didn't give it initial value # we can ignore it for now firstFit = iminuit.Minuit(chi2,m=1) firstFit.migrad(); # In[7]: def cust_chi2(m,c): """ here the arrays x,y and err are external and must be defined before this block """ chi2_array =np.array([]) expected = line(x,m,c) observed = y c = ((observed)- expected)**2 / err**2 chi2_array = np.append(chi2_array,c) return np.nansum(chi2_array) # In[8]: secondFit = iminuit.Minuit(cust_chi2, m=3.0, error_m=0.01, limit_m=(2.5,4.), c=15., error_c=.01, limit_c=(12.,37.)) secondFit.migrad() # In[9]: # The output above is a pretty-printed summary of the fit results from # minuit.print_fmin() # which was automatically called by iminuit.Minuit.migrad() after running MIGRAD. # Let's see our results as Python dictionaries ... print("First fit") print(firstFit.values) print(firstFit.errors) print("Second fit") print(secondFit.values) print(secondFit.errors) # In[10]: fig, ax = plt.subplots() ax.errorbar(x, y, err, fmt='.') ax.plot(x,line(x,firstFit.values['m'],firstFit.values['c']),label="profit") ax.plot(x,line(x,secondFit.values['m'],secondFit.values['c']),label="custom fit") legend = ax.legend(loc='lower right', ncol=1, shadow=False, fontsize=14) # # Gaussian # In[11]: # First let's make some example data np.random.seed(0) data = np.random.randn(10000) * 4 + 1 # sigma = 4 and mean = 1 fig, ax = plt.subplots() hist, bins,_ = ax.hist(data, bins=50, histtype='step',normed=True) # In[12]: # Define your PDF / model def gauss_pdf(x, mu, sigma): """Normalized Gaussian""" return 1 / np.sqrt(2 * np.pi) / sigma * np.exp(-(x - mu) ** 2 / 2. / sigma ** 2) # In[13]: # Build your cost function # Here we use binned likelihood binned_likelihood = probfit.BinnedLH(gauss_pdf, data) # In[14]: # Create the minuit # and give an initial value for the sigma parameter gaus_1st = iminuit.Minuit(binned_likelihood, sigma=3) # Remember: minuit.errordef is automatically set to 0.5 # as required for likelihood fits (this was explained above) # In[15]: gaus_1st.migrad() # Like in all binned fit with long zero tail. It will have to do something about the zero bin # probfit.BinnedLH does handle them gracefully but will give you a warning; # In[16]: bins_x = (bins[:-1]+bins[1:])/2 def cust_gauss(mu,sigma): chi2_array =np.array([]) expected = gauss_pdf(bins_x, mu, sigma) observed = hist c = ((observed)- expected)**2 / expected chi2_array = np.append(chi2_array,c) return np.nansum(chi2_array) gaus_2st = iminuit.Minuit(cust_gauss, mu=.5, error_mu=0.01, limit_mu=(-4.,4.), sigma=4., error_sigma=.01, limit_sigma=(1.,10.)) gaus_2st.migrad(); # In[17]: # generate canvas fig, ax = plt.subplots() # plot data hist, bins= np.histogram(data, bins=50,normed=True) ax.errorbar(bins_x,hist, yerr=np.sqrt(hist/len(data)),fmt='o',capsize=3, markersize=4 ) # plot the two fits ax.plot(bins_x,gauss_pdf(bins_x,gaus_1st.values['mu'],gaus_1st.values['sigma']),lw=2,label="binned Likelyhood\n(profit)") ax.plot(bins_x,gauss_pdf(bins_x,gaus_2st.values['mu'],gaus_2st.values['sigma']),'r--', lw=2,label="general minimization") # generate legend ax.legend(loc='upper right', ncol=1, shadow=False, fontsize=14) # In[18]: #drawing it nicely fig, ax = plt.subplots() gaus_1st.draw_mncontour('mu','sigma', nsigma=4); # In[19]: np.random.seed(0) data_peak1 = np.random.randn(3000) * 0.2 + 2 data_peak2 = np.random.randn(5000) * 0.1 + 4 data_range = (-2, 5) data_bg = probfit.gen_toy(lambda x : 4 + 4 * x + x ** 2, 20000, data_range) data_all = np.concatenate([data_peak1, data_peak2, data_bg]) fig, ax = plt.subplots() ax.hist((data_peak1, data_peak2, data_bg, data_all), label=['Signal 1', 'Signal 2', 'Background', 'Total'], bins=200, histtype='step', range=data_range) ax.legend(loc='upper left') # In[20]: # Using a polynomial to fit a distribution is problematic, because the # polynomial can assume negative values, which results in NaN (not a number) # values in the likelihood function. # To avoid this problem we restrict the fit to the range (0, 5) where # the polynomial is clearly positive. fit_range = (0, 5) normalized_poly = probfit.Normalized(probfit.Polynomial(2), fit_range) normalized_poly = probfit.Extended(normalized_poly, extname='NBkg') gauss1 = probfit.Extended(probfit.rename(probfit.gaussian, ['x', 'mu1', 'sigma1']), extname='N1') gauss2 = probfit.Extended(probfit.rename(probfit.gaussian, ['x', 'mu2', 'sigma2']), extname='N2') # Define an extended PDF consisting of three components pdf = probfit.AddPdf(normalized_poly, gauss1, gauss2) print('normalized_poly: {}'.format(probfit.describe(normalized_poly))) print('gauss1: {}'.format(probfit.describe(gauss1))) print('gauss2: {}'.format(probfit.describe(gauss2))) print('pdf: {}'.format(probfit.describe(pdf))) # In[21]: # Define the cost function in the usual way ... binned_likelihood = probfit.BinnedLH(pdf, data_all, bins=200, extended=True, bound=fit_range) # This is a quite complex fit (11 free parameters!), so we need good starting values. # Actually we even need to set an initial parameter error # for 'mu1' and 'mu2' to make MIGRAD converge. # The initial parameter error is used as the initial step size in the minimization. pars = dict(mu1=1.9, error_mu1=0.1, sigma1=0.2, N1=3000, mu2=4.1, error_mu2=0.1, sigma2=0.1, N2=5000, c_0=4, c_1=4, c_2=1, NBkg=20000) # define iminuit object two_gaussians = iminuit.Minuit(binned_likelihood, pedantic=False, print_level=0, **pars) # You can see that the model already roughly matches the data fig, ax = plt.subplots() binLik=binned_likelihood.draw(two_gaussians, parts=True) # In[22]: # This can take a while ... the likelihood is evaluated a few 100 times # (and each time the distributions are evaluated, including the # numerical computation of the normalizing integrals) two_gaussians.migrad() # In[23]: fig, ax = plt.subplots() binLik=binned_likelihood.show(two_gaussians, parts=True) # In[24]: # You should copy & paste the return tuple from the `draw` docstring ... ((data_edges, datay), (errorp, errorm), (total_pdf_x, total_pdf_y), parts) = binned_likelihood.draw(two_gaussians, parts=True); # ... now we have everything to make our own plot # In[25]: # Now make the plot as pretty as you like, e.g. with matplotlib. plt.figure(figsize=(8, 5)) plt.errorbar(probfit.mid(data_edges), datay, errorp, fmt='.', capsize=0, color='Gray', label='Data') plt.plot(total_pdf_x, total_pdf_y, color='blue', lw=2, label='Total Model') colors = ['orange', 'purple', 'DarkGreen'] labels = ['Background', 'Signal 1', 'Signal 2'] for color, label, part in zip(colors, labels, parts): x, y = part plt.plot(x, y, ls='--', color=color, label=label) plt.grid(True) plt.legend(loc='upper left') # In[ ]: