#!/usr/bin/env python
# coding: utf-8

# In[1]:


entities = {'self', 'addressee', 'other'}


# ### 1 entity referent
# * self ("me")
# * addressee ("you here")
# * other ("somebody else")
# 
# ### 2+ entity referent
# * self, addressee ("me and you here" / inclusive we)
# * self, other ("me and somebody else" / exclusive we)
# * addressee, addressee ("the two or more of you here")
# * addressee, other ("one of you here and somebody else")
# * other, other ("the two or more of them")
# 
# ### 3+ entity referent
# * self, addressee, addressee ("me and the two or more of you here")
# * self, addressee, other ("me, one of you here, and somebody else")
# * self, other, other ("me and two or more other people")
# * addressee, addressee, other ("the two or more of you and somebody else")
# * addressee, other, other ("one of you and two or more other people")
# 
# ### 4+ entity referent
# * self, addressee, addressee, other ("me, the two or more of you here, and somebody else")
# * self, addressee, other, other ("me, one of you here, and two or more other people")
# * addressee, addressee, other, other ("the two or more of you here and two or more other people")
# 
# ### 5+ entity referent
# * self, addressee, addressee, other, other ("me, the two or more of you here, and two or more other people")

# There are 17 possible markers if there's no distinction between 2 entities of the same type and 3+ entities of the same type.
# * a dual or trial entity number could be added to have a 3-way distinction between e.g. [other, other] and [other, other, other]
# * another entity category besides self, addressee, and other could be added (invisible/divine entities)
# * multiple self referents could be included (choral we)
# 
# Also, what about the issue of mis-identifying the cue as "self" rather than "addressee" (kids calling themselves "you")?

# In[2]:


from itertools import combinations, combinations_with_replacement

referents = []

for i in xrange(1, len(entities) * 2):
    for combo in combinations_with_replacement(entities, i):
        
        # choral we is impossible
        if combo.count('self') > 1:
            continue
            
        # only singular vs plural
        if combo.count('addressee') > 2:
            continue
            
        if combo.count('other') > 2:
            continue
            
        # compound cues
        referent = list(combo)
        
        for j in xrange(2, len(combo) + 1):
            for compound in combinations(combo, j):
                
                if compound not in referent:
                    referent.append(compound)
            
        referents.append(referent)


# In[3]:


len(referents)


# In[4]:


referents


# Spoken English collapses these to 6 possibilities: I, you, s/he, we, you guys, they

# In[5]:


def english(referents):
    # first-person
    if 'self' in referents:
        
        if 'addressee' in referents: # inclusive we
            # doesn't matter who else is being referred to
            return 'we'
        
        if 'other' in referents: # exclusive we
            # doesn't matter who else is being referred to
            return 'we'    
            
        return 'I'
    
    # second-person, if the speaker isn't included
    elif 'addressee' in referents:
        
        if referents.count('addressee') > 1: # inclusive you
            return 'you guys'
        
        if 'other' in referents: # exclusive you
            return 'you guys'
        
        return 'you'
    
    # third-person, if the addressee isn't included either
    elif 'other' in referents:
        
        if referents.count('other') > 1:
            return 'they'
        
        return 's/he'


# In[6]:


english(['self', 'addressee'])


# In[7]:


english(['self', 'other'])


# In[8]:


english(['addressee', 'other'])


# In[9]:


english(['addressee', 'addressee']) # also ('addressee', 'addressee') compound


# In[10]:


import pandas

data = pandas.DataFrame()

data['Cues'] = referents
data['Outcomes'] = [english(referent) for referent in referents]
data


# Assume that the distribution of referent sets is uniform, which is probably not true.

# In[11]:


import numpy

def sampler(p):
    
    def uniform():
        return numpy.random.choice(p)
    
    return uniform

referent_sampler = sampler(len(data))


# In[12]:


import ndl

def activation(W):
    return pandas.DataFrame([ndl.activation(c, W) for c in data.Cues], index=data.index)


# In[13]:


W = ndl.rw(data, M=100, distribution=referent_sampler)
A = activation(W)
A


# In[14]:


pandas.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], 
                 index = ['Truth', 'Prediction', 'Accurate?']).T


# With 100 trials, the learner is getting a lot of them right, but just by predicting 'you guys' or 'we' (if self is a referent) all of the time, since those cover most of the referent sets.

# In[15]:


import sim


# In[16]:


english_learning = sim.Simulation(english, data, referent_sampler, 2000)


# In[17]:


import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[18]:


trajectory = [english_learning.accuracy(i) for i in xrange(1, english_learning.MAX_M)]

plt.plot(range(1, len(trajectory) + 1), trajectory, '-')
plt.xlabel('Trial Number')


# In[19]:


get_ipython().run_line_magic('load_ext', 'rpy2.ipython')

get_ipython().run_line_magic('Rpush', 'trajectory')


# In[20]:


get_ipython().run_cell_magic('R', '', "\ntrajectory = data.frame(trial=1:length(trajectory), learned=trajectory)\n\nlibrary('ggplot2')\n\nggplot(trajectory, aes(trial, learned)) + \n    geom_point(alpha=0.25) + \n    stat_smooth() +\n    coord_cartesian(ylim=c(0,1))\n")


# In[ ]: