#!/usr/bin/env python
# coding: utf-8

# Evidence of gluten sensitivity
# ------------------------------
# 
# This notebook contains an exploration of results from this paper:
# 
# http://onlinelibrary.wiley.com/doi/10.1111/apt.13372/epdf
# 
# which reports results of a study of gluten sensitivity.
# 
# Copyright 2015 Allen Downey
# 
# MIT License: http://opensource.org/licenses/MIT

# In[1]:


from __future__ import print_function, division

import thinkbayes2
import thinkplot

from scipy import stats

get_ipython().run_line_magic('matplotlib', 'inline')


# The following class defines the function that computes the likelihood of the data given the hypothetical number of subjects who are gluten sensitive.
# 
# I assume that a subject who is gluten sensitive has a 95% chance of successfully identifying gluten flour based on resumption of symptoms, and that a subject who is not gluten sensitive has only a 40% chance of identifying gluten flour (and a 20% chance of detecting no difference).
# 
# The function works by computing the PMF of the number of gluten identifications conditioned on gs, and then selecting the actual number of identifications from the PMF.

# In[2]:


class Gluten(thinkbayes2.Suite):
    
    def Likelihood(self, data, hypo):
        """
        hypo: int hypothetical # who are gluten sensitive
        data: (int, int) # who correctly identify gluten, others
        """
        gs = hypo
        yes, no = data
        n = yes + no
        ngs = n - gs
        
        pmf1 = thinkbayes2.MakeBinomialPmf(gs, 0.95)
        pmf2 = thinkbayes2.MakeBinomialPmf(ngs, 0.1)
        pmf = pmf1 + pmf2
        return pmf[yes]


# The prior is uniform from 0 to 35; that is, there are equally likely to be any number of GS subjects in the study.

# In[3]:


data = 12, 23
prior = Gluten(range(0, 35+1))
thinkplot.Pdf(prior)

Here's the update.
# In[4]:


posterior = prior.Copy()
posterior.Update(data)


# Here's what the posterior looks like.

# In[5]:


thinkplot.PrePlot(1)
thinkplot.Pdf(posterior)
thinkplot.Config(xlabel='# who are gluten sensitive', 
                 ylabel='PMF', legend=False)
#thinkplot.Save(root='gluten1', formats=['png'])


# And the posterior credible intervals.

# In[6]:


posterior.CredibleInterval(90)


# 

# In[7]:


posterior.CredibleInterval(95)


# We can also compare to a null hypothesis in which none of the respondents are gluten sensitive.

# In[8]:


prior2 = Gluten([0])
prior2.Update(data)


# The Bayes factor in favor of the null hypothesis is about 8, which is moderate evidence.  If you started thinking the probability of the null was 50%, you should now believe it is about 90%.

# In[9]:


0.11 / 0.013


# In[10]:


8.5/9.5


# In[10]: