#!/usr/bin/env python # coding: utf-8 # # CARTOframes with Dask # # This notebook recreate the workflow from , where the author explores [`dask`](http://dask.pydata.org/en/latest/) for splitting up the computations across multiple cores in a machine to complete tasks more quickly. # # ## Basics # # You'll need the following for this: # # 1. Your CARTO username # 2. Your API key # # Paste these values in the quotes (`''`) below. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import cartoframes username = '' # <-- insert your username here api_key = '' # <-- insert your API key here cc = cartoframes.CartoContext('https://{}.carto.com/'.format(username), api_key) # In[2]: from dask import dataframe as dd import pandas as pd columns = ["name", "amenity", "Longitude", "Latitude"] data = dd.read_csv('scratch/POIWorld.csv', usecols=columns) # In[3]: with_name = data[data.name.notnull()] with_amenity = data[data.amenity.notnull()] is_starbucks = with_name.name.str.contains('[Ss]tarbucks') is_dunkin = with_name.name.str.contains('[Dd]unkin') starbucks = with_name[is_starbucks].compute() dunkin = with_name[is_dunkin].compute() # In[4]: starbucks['type'] = 'starbucks' dunkin['type'] = 'dunkin' coffee_places = pd.concat([starbucks, dunkin]) coffee_places.head(20) # ## Write DataFrame to CARTO # In[5]: # specify columns for lng/lat so carto will create a geometry cc.write(coffee_places, table_name='coffee_places', lnglat=('longitude', 'latitude'), overwrite=True) # ### Let's visualize this DataFrame # # Category map on Dunkin' Donuts vs. Starbucks (aka, color by 'type') # In[6]: from cartoframes import Layer cc.map(layers=Layer('coffee_places', color='type', size=3)) # ## Fast Food # In[7]: is_fastfood = with_amenity.amenity.str.contains('fast_food') fastfood = with_amenity[is_fastfood] fastfood.name.value_counts().head(12) # In[8]: ff = fastfood.compute() cc.write(ff, table_name='fastfood_dask', lnglat=('longitude', 'latitude'), overwrite=True) # ### Number of Fast Food places in this OSM dump # In[9]: len(ff) # ### OSM Fast Food POIs # In[11]: from cartoframes import BaseMap cc.map(layers=Layer('fastfood_dask', size=2)) # ### Adding measures from the Data Observatory # # We can augment our datasets to find out some demographics at the areas of each of the coffee places if you wanted to add features for building a model. # In[12]: # DO measures: Total Population, # Children under 18 years of age # Median income data_obs_measures = [{'numer_id': 'us.census.acs.B01003001'}, {'numer_id': 'us.census.acs.B17001001'}, {'numer_id': 'us.census.acs.B19013001'}] coffee_augmented = cc.data('coffee_places', data_obs_measures) coffee_augmented.head()