#!/usr/bin/env python # coding: utf-8 # *** # *** # # 使用GraphLab进行音乐推荐 # *** # *** # # In[2]: import graphlab as gl # set canvas to show sframes and sgraphs in ipython notebook gl.canvas.set_target('ipynb') import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # # 下载数据 # http://s3.amazonaws.com/dato-datasets/millionsong/10000.txt # # In[3]: #train_file = 'http://s3.amazonaws.com/dato-datasets/millionsong/10000.txt' train_file = '/Users/chengjun/bigdata/millionsong/song_usage_10000.txt' sf = gl.SFrame.read_csv(train_file, header=False, delimiter='\t', verbose=False) sf.rename({'X1':'user_id', 'X2':'music_id', 'X3':'rating'}).show() # In[4]: (train_set, test_set) = sf.random_split(0.8, seed=1) # In[5]: popularity_model = gl.popularity_recommender.create(train_set, 'user_id', 'music_id', target = 'rating') # In[7]: item_sim_model = gl.item_similarity_recommender.create(train_set, 'user_id', 'music_id', target = 'rating', similarity_type='cosine') # In[8]: factorization_machine_model = gl.recommender.factorization_recommender.create(train_set, 'user_id', 'music_id', target='rating') # In[9]: result = gl.recommender.util.compare_models(test_set, [popularity_model, item_sim_model, factorization_machine_model], user_sample=.1, skip_set=train_set) # In[10]: K = 10 users = gl.SArray(sf['user_id'].unique().head(100)) # In[11]: recs = item_sim_model.recommend(users=users, k=K) recs.head() # In[ ]: