import foursquare import pandas as pd #ACCESS_TOKEN = "" #client = foursquare.Foursquare(access_token=ACCESS_TOKEN) CLIENT_ID = "" CLIENT_SECRET = "" client = foursquare.Foursquare(client_id=CLIENT_ID, client_secret=CLIENT_SECRET) # bbox = [11.109872,47.815652,12.068588,48.397136] # bounding box for Munich # bbox = [13.088400,52.338120,13.761340,52.675499] # bounding box for Berlin bbox = [5.866240,47.270210,15.042050,55.058140] # bounding box for Germany new_crawl = [] # list of locations to be crawled done = [] # list of crawled locations links = [] # list of tuples that represent links between locations venues = pd.DataFrame() # dictionary of locations id => meta-data on location to_crawl = ["4ade0ccef964a520246921e3", "4cbd1bfaf50e224b160503fc", "4b0674e2f964a520f4eb22e3"] depth = 8 for i in range(depth): new_crawl = [] print "Step " + str(i) + ": " + str(len(venues)) + " locations and " + str(len(links)) + " links. " + str(len(to_crawl)) + " venues to go." for v in to_crawl: if v not in venues: res = client.venues(v) venues = venues.append(pd.DataFrame({"name":res["venue"]["name"],"users":res["venue"]["stats"]["usersCount"], "checkins":res["venue"]["stats"]["checkinsCount"], "lat":res["venue"]["location"]["lat"], "lng":res["venue"]["location"]["lng"]}, index=[v])) next_venues = client.venues.nextvenues(v) for nv in next_venues['nextVenues']['items']: if ((nv["location"]["lat"] > bbox[1]) & (nv["location"]["lat"] < bbox[3]) & (nv["location"]["lng"] > bbox[0]) & (nv["location"]["lng"] < bbox[2])): if nv["id"] not in venues: venues = venues.append(pd.DataFrame({"name":nv["name"],"users":nv["stats"]["usersCount"], "checkins":nv["stats"]["checkinsCount"], "lat":nv["location"]["lat"], "lng":nv["location"]["lng"]}, index=[nv["id"]])) if (nv["id"] not in done) & (nv["id"] not in to_crawl) & (nv["id"] not in new_crawl): new_crawl.append(nv["id"]) links.append((v, nv["id"])) done.append(v) to_crawl = new_crawl venues = venues.reset_index().drop_duplicates(cols='index',take_last=True).set_index('index') venues.head() labels = venues["name"].to_dict() import networkx as nx G = nx.DiGraph() G.add_nodes_from(venues.index) for f,t in links: G.add_edge(f, t) nx.info(G) pagerank = nx.pagerank(G,alpha=0.9) betweenness = nx.betweenness_centrality(G) venues['pagerank'] = [pagerank[n] for n in venues.index] venues['betweenness'] = [betweenness[n] for n in venues.index] import matplotlib.pyplot as plt fig = plt.figure(figsize=(8, 6), dpi=150) ax = fig.add_subplot(111) venues.sort('users', inplace=True) venues.set_index('name')[-20:].users.plot(kind='barh') ax.set_ylabel('Location') ax.set_xlabel('Users') ax.set_title('Top 20 Locations by Users') plt.show() fig = plt.figure(figsize=(8, 6), dpi=150) ax = fig.add_subplot(111) venues.sort('checkins', inplace=True) venues.set_index('name')[-20:].checkins.plot(kind='barh') ax.set_ylabel('Location') ax.set_xlabel('Checkins') ax.set_title('Top 20 Locations by Checkins') plt.show() fig = plt.figure(figsize=(8, 6), dpi=150) ax = fig.add_subplot(111) venues.sort('pagerank', inplace=True) venues.set_index('name')[-20:].pagerank.plot(kind='barh') ax.set_ylabel('Location') ax.set_xlabel('Pagerank') ax.set_title('Top 20 Locations by Pagerank') plt.show() fig = plt.figure(figsize=(8, 6), dpi=150) ax = fig.add_subplot(111) venues.sort('betweenness', inplace=True) venues.set_index('name')[-20:].betweenness.plot(kind='barh') ax.set_ylabel('Location') ax.set_xlabel('Pagerank') ax.set_title('Top 20 Locations by Betweenness Centrality') plt.show() fig = plt.figure(figsize=(16, 9), dpi=150) graph_pos=nx.spring_layout(G) nodesize = [10000*n for n in pagerank.values()] nx.draw_networkx_nodes(G,graph_pos,node_size=nodesize, alpha=0.5, node_color='blue') nx.draw_networkx_edges(G,graph_pos,width=1, alpha=0.3,edge_color='blue') nx.draw_networkx_labels(G, graph_pos, labels=labels, font_size=10, font_family='Arial') plt.axis('off') plt.show() nx.write_graphml(G, "./location_graph.graphml")