Let's download the datasets used on this example (if needed)
import requests
import os.path
def download(url,local_filename):
if os.path.exists(local_filename):
print('File already exists, quiting')
else:
print('Downloading {}...'.format(local_filename))
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
# Downloading from a shared Drive folder
download('https://drive.google.com/uc?export=download&id=0B392y-77KML9dVZLZzc2aXJNVlE', 'brooklyn_poverty.gpkg')
download('https://drive.google.com/uc?export=download&id=0B392y-77KML9dTFYZ052aExTTTA', 'taxi_sample.csv')
Downloading brooklyn_poverty.gpkg... Downloading taxi_sample.csv...
Now we can start with cartoframes
, yay!
import cartoframes
from cartoframes.credentials import set_credentials
from cartoframes.credentials import credentials
import pandas as pd
import os
Get the credentials from a previous version, from environment variables or directly here
try:
credentials()
print('Getting the credentials from a previous session')
except Exception as e:
print('Getting the credentials from your environment or here')
# BASEURL are in the format:
# https://USERNAME.carto.com -- for all cloud users, even if in a multiuser organization
# for on premise installs, contact your admin
BASEURL = os.environ.get('CARTO_API_URL','https://jsanz.carto.com') # <-- replace with your username or set up the envvar
APIKEY = os.environ.get('CARTO_API_KEY',False) # <-- replace False with your CARTO API key or set up the envvar
if BASEURL and APIKEY:
set_credentials(base_url=BASEURL, api_key=APIKEY, overwrite=True)
else:
print('Set up your environment!')
if credentials():
cc = cartoframes.CartoContext()
Getting the credentials from a previous session
# Asuming you've already uploaded the brooklyn poverty geopackage to your account
# Get a CARTO table as a pandas DataFrame
df = cc.read('brooklyn_poverty')
df.head()
--------------------------------------------------------------------------- BadRequestException Traceback (most recent call last) ~/src/projects/geocampes17/env/lib/python3.5/site-packages/carto/sql.py in send(self, sql, parse_json, do_post, format, **request_args) 83 ---> 84 return self.auth_client.get_response_data(resp, parse_json) 85 except Exception as e: ~/src/projects/geocampes17/env/lib/python3.5/site-packages/pyrestcli/auth.py in get_response_data(self, response, parse_json) 51 raise BadRequestException(response_json.get("error", False) or response_json.get("errors", ---> 52 _("Bad Request: {text}").format(text=response.text))) 53 elif response.status_code == requests.codes.not_found: BadRequestException: ['relation "brooklyn_poverty" does not exist'] During handling of the above exception, another exception occurred: CartoException Traceback (most recent call last) <ipython-input-4-cab858c569f5> in <module>() 1 # Asuming you've already uploaded the brooklyn poverty geopackage to your account 2 # Get a CARTO table as a pandas DataFrame ----> 3 df = cc.read('brooklyn_poverty') 4 df.head() ~/src/projects/geocampes17/env/lib/python3.5/site-packages/cartoframes/context.py in read(self, table_name, limit, index, decode_geom) 127 raise ValueError("`limit` parameter must an integer >= 0") 128 --> 129 return self.query(query, decode_geom=decode_geom) 130 131 def write(self, df, table_name, temp_dir='/tmp', overwrite=False, ~/src/projects/geocampes17/env/lib/python3.5/site-packages/cartoframes/context.py in query(self, query, table_name, decode_geom) 493 'SELECT * FROM {table_name}'.format(table_name=new_table_name)) 494 else: --> 495 select_res = self.sql_client.send(query) 496 497 self._debug_print(select_res=select_res) ~/src/projects/geocampes17/env/lib/python3.5/site-packages/carto/sql.py in send(self, sql, parse_json, do_post, format, **request_args) 84 return self.auth_client.get_response_data(resp, parse_json) 85 except Exception as e: ---> 86 raise CartoException(e) 87 88 CartoException: ['relation "brooklyn_poverty" does not exist']
from cartoframes import Layer, styling
l = Layer('brooklyn_poverty', color={'column': 'poverty_per_pop', 'scheme': styling.sunset(7)})
cc.map(layers=l, interactive=False)
# read in a CSV of NYC taxi data
df = pd.read_csv('taxi_sample.csv')
df.head()
unnamed_0 | vendorid | tpep_pickup_datetime | tpep_dropoff_datetime | passenger_count | trip_distance | pickup_longitude | pickup_latitude | ratecodeid | store_and_fwd_flag | dropoff_longitude | dropoff_latitude | payment_type | fare_amount | extra | mta_tax | tip_amount | tolls_amount | improvement_surcharge | total_amount | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 303227 | 2 | 2016-05-01 14:52:11+00 | 2016-05-01 15:00:36+00 | 2 | 2.08 | -74.006706 | 40.730461 | 1 | False | -74.012383 | 40.706779 | 1 | 8.5 | 0.0 | 0.5 | 1.00 | 0.0 | 0.3 | 10.30 |
1 | 183993 | 1 | 2016-05-01 08:34:08+00 | 2016-05-01 08:49:02+00 | 1 | 3.00 | -73.924957 | 40.744125 | 1 | False | -73.973824 | 40.762779 | 1 | 13.5 | 0.0 | 0.5 | 2.00 | 0.0 | 0.3 | 16.30 |
2 | 1434785 | 1 | 2016-05-04 09:44:40+00 | 2016-05-04 10:07:09+00 | 1 | 2.10 | -73.973488 | 40.748501 | 1 | False | -73.998955 | 40.740833 | 2 | 14.5 | 0.0 | 0.5 | 0.00 | 0.0 | 0.3 | 15.30 |
3 | 421142 | 2 | 2016-05-01 20:50:11+00 | 2016-05-01 21:05:24+00 | 1 | 4.41 | -73.999786 | 40.743267 | 1 | False | -73.966362 | 40.792370 | 2 | 15.0 | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 16.30 |
4 | 547655 | 2 | 2016-05-02 07:26:56+00 | 2016-05-02 07:53:53+00 | 2 | 4.01 | -73.963631 | 40.803360 | 1 | False | -73.956963 | 40.784939 | 1 | 19.5 | 0.0 | 0.5 | 4.06 | 0.0 | 0.3 | 24.36 |
# send it to carto so we can map it
cc.write(df, 'taxi_sample',
lnglat=('pickup_longitude', 'pickup_latitude'),
overwrite=True)
df.head()
Creating geometry out of columns `pickup_longitude`/`pickup_latitude` Table successfully written to CARTO: https://jsanz.carto.com/dataset/taxi_sample
unnamed_0 | vendorid | tpep_pickup_datetime | tpep_dropoff_datetime | passenger_count | trip_distance | pickup_longitude | pickup_latitude | ratecodeid | store_and_fwd_flag | dropoff_longitude | dropoff_latitude | payment_type | fare_amount | extra | mta_tax | tip_amount | tolls_amount | improvement_surcharge | total_amount | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 303227 | 2 | 2016-05-01 14:52:11+00 | 2016-05-01 15:00:36+00 | 2 | 2.08 | -74.006706 | 40.730461 | 1 | False | -74.012383 | 40.706779 | 1 | 8.5 | 0.0 | 0.5 | 1.00 | 0.0 | 0.3 | 10.30 |
1 | 183993 | 1 | 2016-05-01 08:34:08+00 | 2016-05-01 08:49:02+00 | 1 | 3.00 | -73.924957 | 40.744125 | 1 | False | -73.973824 | 40.762779 | 1 | 13.5 | 0.0 | 0.5 | 2.00 | 0.0 | 0.3 | 16.30 |
2 | 1434785 | 1 | 2016-05-04 09:44:40+00 | 2016-05-04 10:07:09+00 | 1 | 2.10 | -73.973488 | 40.748501 | 1 | False | -73.998955 | 40.740833 | 2 | 14.5 | 0.0 | 0.5 | 0.00 | 0.0 | 0.3 | 15.30 |
3 | 421142 | 2 | 2016-05-01 20:50:11+00 | 2016-05-01 21:05:24+00 | 1 | 4.41 | -73.999786 | 40.743267 | 1 | False | -73.966362 | 40.792370 | 2 | 15.0 | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 16.30 |
4 | 547655 | 2 | 2016-05-02 07:26:56+00 | 2016-05-02 07:53:53+00 | 2 | 4.01 | -73.963631 | 40.803360 | 1 | False | -73.956963 | 40.784939 | 1 | 19.5 | 0.0 | 0.5 | 4.06 | 0.0 | 0.3 | 24.36 |
from cartoframes import Layer
cc.map(layers=Layer('taxi_sample'),
interactive=False)
Oops, there are some zero-valued long/lats in there, so the results are going to null island. Let's remove them.
df = df[(df['pickup_longitude'] != 0) | (df['pickup_latitude'] != 0)]
cc.write(df, 'taxi_sample', overwrite=True,
lnglat=('pickup_longitude', 'pickup_latitude'))
Creating geometry out of columns `pickup_longitude`/`pickup_latitude` Table successfully written to CARTO: https://jsanz.carto.com/dataset/taxi_sample
from cartoframes import Layer
cc.map(layers=Layer('taxi_sample'),
interactive=False)
# Let's take a look at what's going on at JFK airport, styled by the fare amount
cc.map(layers=Layer('taxi_sample',
size=4,
color={'column': 'fare_amount',
'scheme': styling.sunset(7)}),
zoom=12, lng=-73.7880, lat=40.6629,
interactive=False)
Remove the dataset
cc.delete('taxi_sample')