Set some useful LaTeX commands for later in the document: $ \newcommand{\Int}[2] {\displaystyle \int\limits_{#1}^{#2}} $ $ \newcommand{\Prod}[2] {\displaystyle \prod\limits_{#1}^{#2}} $ $ \newcommand{\Sum}[2] {\displaystyle \sum\limits_{#1}^{#2}} $
$ \newcommand{subNsubR}[3] {{}_{#1} #2_{#3}} $ $ \newcommand{rust}[1] {{\require{color}\color{Bittersweet}#1}} $ $ \newcommand{sky}[1] {{\require{color}\color{SkyBlue}#1}} $
'''
debinningWithNewStatistics.ipynb
This file is an ipython notebook designed to be converted to reveal.js
slides via this command:
ipython nbconvert debinningWithNewStatistics.ipynb --to=slides --post=serve [--config slides_config.py]
This file also contains the main body of debinning work in the code blocks below.
Copyright (C) 2015 Abram Krislock
Talks given from ~= this version of the slides:
"Debinning: Data Smoothing with a New Probability Calculus"
- Fysisk institutt, Universitetet i Oslo, February 25, 2015
- Mitchell Institute for Fundamental Physics and Astronomy, Texas A&M University, March 13, 2015
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/gpl-3.0.txt.
'''
import sys
sys.path[0] = '..'
from __future__ import division
import numpy as np
from time import time
import bisect
import operator as op
from utilities import settings
import utilities.sampleFunctions as funcs
import utilities.probCalc as pc
import utilities.plotting as debinPlot
import utilities.minimizers as miniz
reload(funcs)
reload(pc)
reload(debinPlot)
reload(miniz)
%matplotlib inline
from matplotlib import pyplot as plt
from IPython.display import HTML
funcs.randomGenerator.setSeed('seedOne')
settings['simpleSortedOutput'] = True
nPulls = 400
basePars = (0.00006, 10., 200., 110., 1.5, 50., 140.)
baseGuess = (140., -2., -0.1, 10.)
def fitTwoHistograms(percentile):
data = funcs.functionToData(funcs.easyEndpoint, nPulls, 0.5, (0,200), basePars)
percentieth = data[data > np.percentile(data, percentile)]
# The histograms are filled with the same data, only different binning.
binContentsOne, binEdgesOne = np.histogram(percentieth, np.arange(3., 204., 10.))
binContentsTwo, binEdgesTwo = np.histogram(percentieth, np.arange(0., 201., 5.))
# The fit ranges are defined to be from the value of the 70th percentile data point
# to "the last non-zero bin", blindly.
fitBinsOne = np.flatnonzero(binContentsOne[binContentsOne.argmax():]) + binContentsOne.argmax()
fitBinsTwo = np.flatnonzero(binContentsTwo[binContentsTwo.argmax():]) + binContentsTwo.argmax()
### FITTING METHOD: Binned Maximum Likelihood ###
fitGuessOne = np.array([value * funcs.randomGenerator.normal(1., 0.1) for value in baseGuess])
fitGuessTwo = fitGuessOne.copy()
def chiSqOne(pars):
# return funcs.chiSqSimple(binContentsOne, binEdgesOne, fitBinsOne, funcs.theKink, pars)
return -2. * funcs.theKink_BinnedLikelihood(binContentsOne, binEdgesOne, fitBinsOne, pars)
def chiSqTwo(pars):
# return funcs.chiSqSimple(binContentsTwo, binEdgesTwo, fitBinsTwo, funcs.theKink, pars)
return -2. * funcs.theKink_BinnedLikelihood(binContentsTwo, binEdgesTwo, fitBinsTwo, pars)
fitOne = miniz.nelderMead(chiSqOne, fitGuessOne, xtol=0.01, ftol=0.05, disp=0)
fitTwo = miniz.nelderMead(chiSqTwo, fitGuessTwo, xtol=0.01, ftol=0.05, disp=0)
return (data, fitOne, fitTwo)
xKinkOne = []
xKinkTwo = []
xKinkDiff = []
percentile = 60
nTrials = 1000
for i in xrange(nTrials):
data, fitOne, fitTwo = fitTwoHistograms(percentile)
xKinkOne.append(fitOne[0])
xKinkTwo.append(fitTwo[0])
xKinkDiff.append(baseGuess[0] + fitTwo[0] - fitOne[0])
def plotHistoFitCompare():
fig, axes = plt.subplots(figsize=(11,7), facecolor='#ffffff')
# plt.hist(xKinkOne, range=(80,180), color='#66a61e', bins=np.arange(80, 181, 2), alpha=0.6, zorder=2)
# plt.hist(xKinkTwo, range=(80,180), color='#0088ff', bins=np.arange(80, 181, 2), alpha=0.6, zorder=2)
plt.hist(xKinkDiff, range=(80,180), color='#0088ff', bins=np.arange(80, 181, 2), alpha=0.8, zorder=1)
axes.set_xticks(np.arange(80, 181, 10))
axes.set_xticks(np.arange(80, 181, 2), minor=True)
axes.set_xticklabels([r'$%i$' % int(num) for num in np.arange(80., 181., 10.)], fontsize=20)
axes.set_yticks(np.arange(0, 250, 50))
axes.set_yticks(np.arange(0, 250, 10), minor=True)
axes.set_yticklabels([r'$%i$' % num for num in np.arange(0, 250, 50)], fontsize=20)
axes.tick_params(length=10, width=1.2, labelsize=22, zorder=10, pad=10)
axes.tick_params(which='minor', length=5, width=1.2, zorder=11)
axes.minorticks_on()
axes.set_xlabel('True Kink (140) + Kink Fit Difference', labelpad=5, fontsize=22)
axes.set_ylabel('Number of Counts (400 total)', labelpad=5, fontsize=22)
axes.annotate(r'larger $x_{\rm kink2}$', xy=(140.5,200), xycoords='data', xytext=(50,-5),
textcoords='offset points', size=22,
arrowprops=dict(arrowstyle='<-', color='#e7298a', linewidth=3))
axes.annotate(r'larger $x_{\rm kink1}$', xy=(139.5,200), xycoords='data', xytext=(-180,-5),
textcoords='offset points', size=22,
arrowprops=dict(arrowstyle='<-', color='#66a61e', linewidth=3))
axes.annotate('', xy=(140.15,250), xycoords='data', xytext=(0,-100),
textcoords='offset points', arrowprops=dict(arrowstyle='-'))
print
plotHistoFitCompare()
$ {\Large ({\rm HW}-1)}$ $${\large {\rm Prove\ the}\ \subNsubR{n}{f}{r}\ {\rm Theorem\ by\ computing\ the\ chain\ integral.} }$$
Proof:
$$ \subNsubR{n}{f}{r}(x_r) dx_r = \Int{\{x_{i \neq r}\}_n}{} dP(\{x_i\}_n) = n! \Int{-\infty}{x_r} f(x_{r-1}) \Int{-\infty}{x_{r-1}} f(x_{r-2}) \cdots \Int{-\infty}{x_2} f(x_1) \Prod{i=1}{r-1} dx_i $$$$ \times\ f(x_r) dx_r \Int{x_r}{\infty} f(x_{r+1}) \Int{x_{r+1}}{\infty} f(x_{r+2}) \cdots \Int{x_{n-1}}{\infty} f(x_n) \Prod{j=r+1}{n} dx_j $$Integration by parts:
$$ \Int{-\infty}{x} f(\widetilde{x}) F^{a-1}(\widetilde{x}) d\widetilde{x} = \frac{1}{a} F^a(x) \qquad {\rm and} \qquad \Int{x}{\infty} f(\widetilde{x}) \left(1 - F(\widetilde{x})\right)^{a-1} d\widetilde{x} = \frac{1}{a} \left(1 - F(x)\right)^a $$reload(funcs)
def quickData():
''' Shorthand for getting easyEndpoint data (384 x values in [8.5,200])
- set nPulls before calling
- see utilities.sampleFunctions.functionToData for more details
'''
return funcs.functionToData(funcs.easyEndpoint, nPulls, 0.5, np.arange(8.5, 200.2, 0.5))
def quickBG():
''' Shorthand for getting endpointBG data (384 x values in [8.5,200])
- set nPulls before calling
- see utilities.sampleFunctions.functionToData for more details
'''
return funcs.functionToData(funcs.endpointBG, nPulls, 0.5, np.arange(8.5, 200.2, 0.5))
nPulls = 10
pullData = {index:[] for index in xrange(nPulls)}
settings['simpleSortedOutput'] = True
for iteration in xrange(2000):
for i,v in enumerate(quickData()):
pullData[i].append(v)
settings['simpleSortedOutput'] = False
x, f, F, data = quickData()
bgx, bgf, bgF, bgdata = quickBG()
f /= F[-1]
bgf /= F[-1]
bgF /= bgF[-1]
F /= F[-1]
ithPDF = {}
for i in xrange(1, nPulls+1):
ithPDF[i-1] = np.array([f[j] * F[j]**(i-1) * (1 - F[j])**(nPulls-i) * pc.nKr(nPulls, i) for j in xrange(len(x))])
def makeHist(i=0):
fig, axes = plt.subplots(figsize=(9,5), facecolor='#ffffff')
plt.plot(x, f, '--', color='#0088ff', alpha=0.6, linewidth=3)
plt.plot(x, bgf, '-.', color='#0088ff', alpha=0.6, linewidth=3)
plt.hist(pullData[i], bins=np.arange(0, 201, 5), alpha=0.4, color='#66a61e', normed=True)
plt.plot(x, ithPDF[i], '#964a01', linewidth=3)
axes.set_xticks(np.arange(0, 201, 50))
axes.set_xticks(np.arange(0, 201, 10), minor=True)
axes.set_xticklabels([r'$%i$' % int(num) for num in np.arange(0, 201, 50)], fontsize=20)
axes.set_ylim(bottom=-0.0001, top=0.0401)
axes.set_yticks(np.arange(0, 0.04, 0.01))
axes.set_yticklabels([r'$%0.3f$' % num for num in np.arange(0, 0.04, 0.01)], fontsize=20)
axes.set_yticks(np.arange(0, 0.04, 0.01/5), minor=True)
axes.set_xlabel('Physical Observable', labelpad=5, fontsize=22)
axes.set_ylabel('Probability', labelpad=5, fontsize=22)
axes.tick_params(length=10, width=1.2, labelsize=22, zorder=10, pad=10)
axes.tick_params(which='minor', length=5, width=1.2, zorder=11)
axes.minorticks_on()
makeHist(0)
makeHist(1)
makeHist(2)
makeHist(3)
makeHist(4)
makeHist(5)
makeHist(6)
makeHist(7)