#!/usr/bin/env python # coding: utf-8 # # Activity 9 - Recommender Systems # # This notebook illustrates the recommender system example used in the lecture notes for Text Analytics. # # In[1]: import pandas as pd import numpy as np # In[3]: def rand_array(): return list(np.round(np.random.random([10,]))) rand_array() # In[4]: data = {} data['Items'] = ['Apple','Banana','Pear','Chicken','Beef','Lamb','Pizza','Pasta','Rice','Cake'] data['Alice']= rand_array() data['Bob']= rand_array() data['Charlie']= rand_array() data['Daisy']= rand_array() data['Edward']= rand_array() data['Faye']= rand_array() data['George']= rand_array() data['Harriet']= rand_array() data['Imogen']= rand_array() data['John']= rand_array() df = pd.DataFrame(data) df # Above we have a table, where we can see whether each person likes a particular item. # # * If the item is scored 1.0 then the person likes the item. # * If the item is scored 0.0 then the person does not like the item. # # In[5]: df['Kyle'] = df.values[:,1:11].sum(axis=1) / 10 #df = df.drop(['Lemmy'], axis=1) df # Above we observe a new person - Kyle. At this stage we do not know anything about Kyle. # # * We can generate a initial profile for this new user by taking the average popularity of each item for each user from our current knowledge base. # In[11]: #df.style.set_properties(**{'background-color': '#7f3fee'}, subset=['Kyle']) df = df.drop(['Kyle'], axis=1) df['Kyle'] = [0,0,0,0,1,0,0,0,0,0] df # Suppose we now know that Kyle likes Beef. We could initialise his profile with this information. # In[14]: df['Kyle'].argmax() p = df.iloc[df['Kyle'].argmax(),:] p = p[p==1] v = len(p) p = df[p.index] p # The above shows us who else also likes Beef, allowing us to see who is more similar to Kyle. # In[121]: p = p.drop(['Kyle'], axis=1) p['Kyle'] = p.values[:,:].sum(axis=1) / (v-1) p["Items"] = df['Items'] p # Since we know for sure that Kyle likes Beef, we can use this subset of users to get a more precise initialisation of Kyle's preferences. As a simple observation, previously we were only 0.6 as to whether Kyle liked Pasta, when we had a "cold" initialisation of his profile. Knowing that he likes Beef, we are now very confident that he likes Pasta, since everyone in our dataset who likes Beef also likes Pasta, and hence his probability for this now increases to 1.0. # In[ ]: