#!/usr/bin/env python # coding: utf-8 # In[1]: import json import geopandas as gpd import matplotlib.pyplot as plt import pandas as pd import requests import yaml from geopandas.tools import sjoin import plotly.graph_objects as go # Load parameters to populate iNaturalist API request from YAML configuration file `params.yaml` # In[2]: # API docs in https://www.inaturalist.org/pages/api+reference params = yaml.load(open('params.yml'), yaml.SafeLoader) api_params = params['api'] poly_endpoint = params['join']['url'] api_baseurl = params['api']['url'] # Load polygons to be intersected with iNaturalist point data (ideally endpoint serves GeoJSON) # In[3]: dfpoly = gpd.read_file(poly_endpoint) # Define the method to fetch the data from the iNaturalist REST API # In[4]: def get_data(api_params={}): print("⏳ Fetch iNaturalist data from API... page %d" % api_params['page']) r = requests.get(api_baseurl, params=api_params, headers={ 'Content-Type': 'application/json'}) if r.status_code != 200: raise Exception( 'Yikes - iNaturalist API error: {}'.format( r.status_code) ) df = pd.json_normalize(r.json(), record_path=['results']) # Extract lat-Lon and keep critical columns only to speed things up df[['latitude', 'longitude']] = df['location'].str.split(',', 1, expand=True) df = df[['latitude', 'longitude', 'taxon.id', 'taxon.name', 'taxon.preferred_common_name']] if len(df.index) == api_params['per_page']: # more pages to fetch api_params['page'] += 1 df = df.append(get_data(api_params)) return df # In[5]: df = get_data(api_params) print("✔ %d records downloaded from the iNaturalist API" % len(df.index)) # Calculate inner join and manipulate data to be used in the plots below # In[6]: gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude)) gdf.set_crs(crs='EPSG:4326', inplace=True) pointInPolys = sjoin(gdf, dfpoly, how='left') def calc_within_area(row): return ('Outside' if pd.isnull(row['OBJECTID']) else 'Inside') pointInPolys['state'] = pointInPolys.apply(calc_within_area, axis=1) dftaxon = pointInPolys.groupby(['taxon.id','state']).size().reset_index(name='counts') subdf = pointInPolys.loc[pointInPolys['Name'] != pd.NA] # In[7]: get_ipython().run_line_magic('matplotlib', 'inline') pd.options.plotting.backend = "plotly" # In[8]: fig = pd.value_counts(pointInPolys['state']).plot.bar(title='Observations in and outside MRs', labels=dict(index="", value="# Observations", variable="")) fig.update_layout(showlegend=False) fig.show() # In[9]: fig = pd.value_counts(dftaxon['state']).plot.bar(title='Taxon richness in and outside MRs', labels=dict(index="", value="# Taxon", variable="")) fig.update_layout(showlegend=False) fig.show() # In[10]: fig = pd.value_counts(subdf['Name']).plot.bar(title='Observations inside MRs', labels=dict(index="", value="# Observations", variable="MR")) fig.update_layout(showlegend=False) fig.show() # In[11]: dftaxon = pointInPolys.groupby(['Name','taxon.id']).size().reset_index(name='counts') fig = pd.value_counts(dftaxon['Name']).plot.bar(title='Taxon richness inside MRs', labels=dict(index="", value="# Taxon", variable="")) fig.update_layout(showlegend=False) fig.show() # In[12]: subdff = subdf.groupby(['taxon.name','Name']).size().reset_index(name='counts').sort_values(by=['Name', 'counts']) fig = go.Figure(data=[go.Table( header=dict(values=list(subdff.columns), fill_color='paleturquoise', align='left'), cells=dict(values=[subdff.Name, subdff['taxon.name'], subdff.counts], fill_color='lavender', align='left')) ]) fig.show()