import json
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests
import yaml
from geopandas.tools import sjoin
import plotly.graph_objects as go
Load parameters to populate iNaturalist API request from YAML configuration file params.yaml
# API docs in https://www.inaturalist.org/pages/api+reference
params = yaml.load(open('params.yml'), yaml.SafeLoader)
api_params = params['api']
poly_endpoint = params['join']['url']
api_baseurl = params['api']['url']
Load polygons to be intersected with iNaturalist point data (ideally endpoint serves GeoJSON)
dfpoly = gpd.read_file(poly_endpoint)
Define the method to fetch the data from the iNaturalist REST API
def get_data(api_params={}):
print("⏳ Fetch iNaturalist data from API... page %d" % api_params['page'])
r = requests.get(api_baseurl, params=api_params, headers={
'Content-Type': 'application/json'})
if r.status_code != 200:
raise Exception(
'Yikes - iNaturalist API error: {}'.format(
r.status_code)
)
df = pd.json_normalize(r.json(), record_path=['results'])
# Extract lat-Lon and keep critical columns only to speed things up
df[['latitude', 'longitude']] = df['location'].str.split(',', 1, expand=True)
df = df[['latitude', 'longitude', 'taxon.id', 'taxon.name', 'taxon.preferred_common_name']]
if len(df.index) == api_params['per_page']: # more pages to fetch
api_params['page'] += 1
df = df.append(get_data(api_params))
return df
df = get_data(api_params)
print("✔ %d records downloaded from the iNaturalist API" % len(df.index))
⏳ Fetch iNaturalist data from API... page 1 ⏳ Fetch iNaturalist data from API... page 2 ⏳ Fetch iNaturalist data from API... page 3 ⏳ Fetch iNaturalist data from API... page 4 ⏳ Fetch iNaturalist data from API... page 5 ⏳ Fetch iNaturalist data from API... page 6 ⏳ Fetch iNaturalist data from API... page 7 ⏳ Fetch iNaturalist data from API... page 8 ⏳ Fetch iNaturalist data from API... page 9 ⏳ Fetch iNaturalist data from API... page 10 ⏳ Fetch iNaturalist data from API... page 11 ⏳ Fetch iNaturalist data from API... page 12 ⏳ Fetch iNaturalist data from API... page 13 ⏳ Fetch iNaturalist data from API... page 14 ⏳ Fetch iNaturalist data from API... page 15 ⏳ Fetch iNaturalist data from API... page 16 ⏳ Fetch iNaturalist data from API... page 17 ⏳ Fetch iNaturalist data from API... page 18 ✔ 3480 records downloaded from the iNaturalist API
Calculate inner join and manipulate data to be used in the plots below
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
gdf.set_crs(crs='EPSG:4326', inplace=True)
pointInPolys = sjoin(gdf, dfpoly, how='left')
def calc_within_area(row):
return ('Outside' if pd.isnull(row['OBJECTID']) else 'Inside')
pointInPolys['state'] = pointInPolys.apply(calc_within_area, axis=1)
dftaxon = pointInPolys.groupby(['taxon.id','state']).size().reset_index(name='counts')
subdf = pointInPolys.loc[pointInPolys['Name'] != pd.NA]
%matplotlib inline
pd.options.plotting.backend = "plotly"
fig = pd.value_counts(pointInPolys['state']).plot.bar(title='Observations in and outside MRs', labels=dict(index="", value="# Observations", variable=""))
fig.update_layout(showlegend=False)
fig.show()