#!/usr/bin/env python
# coding: utf-8

# # CARTOframes with Dask
# 
# This notebook recreate the workflow from <https://jakevdp.github.io/blog/2015/08/14/out-of-core-dataframes-in-python/>, where the author explores [`dask`](http://dask.pydata.org/en/latest/) for splitting up the computations across multiple cores in a machine to complete tasks more quickly. 
# 
# ## Basics
# 
# You'll need the following for this:
# 
# 1. Your CARTO username
# 2. Your API key
# 
# Paste these values in the quotes (`''`) below.

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import cartoframes

username = ''   # <-- insert your username here
api_key = ''    # <-- insert your API key here

cc = cartoframes.CartoContext('https://{}.carto.com/'.format(username),
                              api_key)


# In[2]:


from dask import dataframe as dd
import pandas as pd
columns = ["name", "amenity", "Longitude", "Latitude"]
data = dd.read_csv('scratch/POIWorld.csv', usecols=columns)


# In[3]:


with_name = data[data.name.notnull()]
with_amenity = data[data.amenity.notnull()]

is_starbucks = with_name.name.str.contains('[Ss]tarbucks')
is_dunkin = with_name.name.str.contains('[Dd]unkin')

starbucks = with_name[is_starbucks].compute()
dunkin = with_name[is_dunkin].compute()


# In[4]:


starbucks['type'] = 'starbucks'
dunkin['type'] = 'dunkin'
coffee_places = pd.concat([starbucks, dunkin])
coffee_places.head(20)


# ## Write DataFrame to CARTO

# In[5]:


# specify columns for lng/lat so carto will create a geometry
cc.write(coffee_places,
         table_name='coffee_places',
         lnglat=('longitude', 'latitude'),
         overwrite=True)


# ### Let's visualize this DataFrame
# 
# Category map on Dunkin' Donuts vs. Starbucks (aka, color by 'type')

# In[6]:


from cartoframes import Layer
cc.map(layers=Layer('coffee_places', color='type', size=3))


# ## Fast Food

# In[7]:


is_fastfood = with_amenity.amenity.str.contains('fast_food')
fastfood = with_amenity[is_fastfood]
fastfood.name.value_counts().head(12)


# In[8]:


ff = fastfood.compute()
cc.write(ff,
         table_name='fastfood_dask',
         lnglat=('longitude', 'latitude'), overwrite=True)


# ### Number of Fast Food places in this OSM dump

# In[9]:


len(ff)


# ### OSM Fast Food POIs

# In[11]:


from cartoframes import BaseMap
cc.map(layers=Layer('fastfood_dask', size=2))


# ### Adding measures from the Data Observatory
# 
# We can augment our datasets to find out some demographics at the areas of each of the coffee places if you wanted to add features for building a model.

# In[12]:


# DO measures: Total Population,
#              Children under 18 years of age
#              Median income

data_obs_measures = [{'numer_id': 'us.census.acs.B01003001'},
                     {'numer_id': 'us.census.acs.B17001001'},
                     {'numer_id': 'us.census.acs.B19013001'}]
coffee_augmented = cc.data('coffee_places', data_obs_measures)
coffee_augmented.head()