#!/usr/bin/env python # coding: utf-8 # In[1]: entities = {'self', 'addressee', 'other'} # ### 1 entity referent # * self ("me") # * addressee ("you here") # * other ("somebody else") # # ### 2+ entity referent # * self, addressee ("me and you here" / inclusive we) # * self, other ("me and somebody else" / exclusive we) # * addressee, addressee ("the two or more of you here") # * addressee, other ("one of you here and somebody else") # * other, other ("the two or more of them") # # ### 3+ entity referent # * self, addressee, addressee ("me and the two or more of you here") # * self, addressee, other ("me, one of you here, and somebody else") # * self, other, other ("me and two or more other people") # * addressee, addressee, other ("the two or more of you and somebody else") # * addressee, other, other ("one of you and two or more other people") # # ### 4+ entity referent # * self, addressee, addressee, other ("me, the two or more of you here, and somebody else") # * self, addressee, other, other ("me, one of you here, and two or more other people") # * addressee, addressee, other, other ("the two or more of you here and two or more other people") # # ### 5+ entity referent # * self, addressee, addressee, other, other ("me, the two or more of you here, and two or more other people") # There are 17 possible markers if there's no distinction between 2 entities of the same type and 3+ entities of the same type. # * a dual or trial entity number could be added to have a 3-way distinction between e.g. [other, other] and [other, other, other] # * another entity category besides self, addressee, and other could be added (invisible/divine entities) # * multiple self referents could be included (choral we) # # Also, what about the issue of mis-identifying the cue as "self" rather than "addressee" (kids calling themselves "you")? # In[2]: from itertools import combinations, combinations_with_replacement referents = [] for i in xrange(1, len(entities) * 2): for combo in combinations_with_replacement(entities, i): # choral we is impossible if combo.count('self') > 1: continue # only singular vs plural if combo.count('addressee') > 2: continue if combo.count('other') > 2: continue # compound cues referent = list(combo) for j in xrange(2, len(combo) + 1): for compound in combinations(combo, j): if compound not in referent: referent.append(compound) referents.append(referent) # In[3]: len(referents) # In[4]: referents # Spoken English collapses these to 6 possibilities: I, you, s/he, we, you guys, they # In[5]: def english(referents): # first-person if 'self' in referents: if 'addressee' in referents: # inclusive we # doesn't matter who else is being referred to return 'we' if 'other' in referents: # exclusive we # doesn't matter who else is being referred to return 'we' return 'I' # second-person, if the speaker isn't included elif 'addressee' in referents: if referents.count('addressee') > 1: # inclusive you return 'you guys' if 'other' in referents: # exclusive you return 'you guys' return 'you' # third-person, if the addressee isn't included either elif 'other' in referents: if referents.count('other') > 1: return 'they' return 's/he' # In[6]: english(['self', 'addressee']) # In[7]: english(['self', 'other']) # In[8]: english(['addressee', 'other']) # In[9]: english(['addressee', 'addressee']) # also ('addressee', 'addressee') compound # In[10]: import pandas data = pandas.DataFrame() data['Cues'] = referents data['Outcomes'] = [english(referent) for referent in referents] data # Assume that the distribution of referent sets is uniform, which is probably not true. # In[11]: import numpy def sampler(p): def uniform(): return numpy.random.choice(p) return uniform referent_sampler = sampler(len(data)) # In[12]: import ndl def activation(W): return pandas.DataFrame([ndl.activation(c, W) for c in data.Cues], index=data.index) # In[13]: W = ndl.rw(data, M=100, distribution=referent_sampler) A = activation(W) A # In[14]: pandas.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], index = ['Truth', 'Prediction', 'Accurate?']).T # With 100 trials, the learner is getting a lot of them right, but just by predicting 'you guys' or 'we' (if self is a referent) all of the time, since those cover most of the referent sets. # In[15]: import sim # In[16]: english_learning = sim.Simulation(english, data, referent_sampler, 2000) # In[17]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[18]: trajectory = [english_learning.accuracy(i) for i in xrange(1, english_learning.MAX_M)] plt.plot(range(1, len(trajectory) + 1), trajectory, '-') plt.xlabel('Trial Number') # In[19]: get_ipython().run_line_magic('load_ext', 'rpy2.ipython') get_ipython().run_line_magic('Rpush', 'trajectory') # In[20]: get_ipython().run_cell_magic('R', '', "\ntrajectory = data.frame(trial=1:length(trajectory), learned=trajectory)\n\nlibrary('ggplot2')\n\nggplot(trajectory, aes(trial, learned)) + \n geom_point(alpha=0.25) + \n stat_smooth() +\n coord_cartesian(ylim=c(0,1))\n") # In[ ]: