#!/usr/bin/env python
# coding: utf-8

# # Accessing OOI time series via ERDDAP 

# Most IOOS data are available for access online via user applications and data services for machine-to-machine access. **ERDDAP** (see [here](https://coastwatch.pfeg.noaa.gov/erddapinfo/) and [here](https://coastwatch.pfeg.noaa.gov/erddap/index.html)) is one of those applicattions. "ERDDAP is a data server that gives you a simple, consistent way to download data in the format and the spatial and temporal coverage that you want. ERDDAP is a web application with an interface for people to use. It is also a RESTful web service that allows data access directly from any computer program (e.g. Matlab, R, or webpages)."

# ERDDAP has RESTful API that is very convenient for creating web apps, data portals, etc. However, writing those URLs manually can be tedious and error prone. This notebook walks through an easy to set up ERDDAP RESTful URL by using the python client, `erddapy`, https://pyoceans.github.io/erddapy/
# 
# A typical ERDDAP RESTful URL looks like:
# 
# [https://data.ioos.us/gliders/erddap/tabledap/whoi_406-20160902T1700.mat?depth,latitude,longitude,salinity,temperature,time&time>=2016-07-10T00:00:00Z&time<=2017-02-10T00:00:00Z &latitude>=38.0&latitude<=41.0&longitude>=-72.0&longitude<=-69.0](https://data.ioos.us/gliders/erddap/tabledap/whoi_406-20160902T1700.mat?depth,latitude,longitude,salinity,temperature,time&time>=2016-07-10T00:00:00Z&time<=2017-02-10T00:00:00Z&latitude>=38.0&latitude<=41.0&longitude>=-72.0&longitude<=-69.0)
# 
# Let's break it down to smaller parts:
# 
# - **server**: https://data.ioos.us/gliders/erddap/
# - **protocol**: tabledap
# - **dataset_id**: whoi_406-20160902T1700
# - **response**: .mat
# - **variables**: depth,latitude,longitude,temperature,time
# - **constraints**:
#     - time>=2016-07-10T00:00:00Z
#     - time<=2017-02-10T00:00:00Z
#     - latitude>=38.0
#     - latitude<=41.0
#     - longitude>=-72.0
#     - longitude<=-69.0

# Note: This Jupyter notebook originated from [an ERDDAPY notebook from the IOOS gallery](https://ioos.github.io/notebooks_demos/notebooks/2018-03-01-erddapy)

# In[1]:


import pandas as pd
from erddapy import ERDDAP
from erddapy.utilities import urlopen
import hvplot.xarray
import holoviews as hv


# ## 1. Search ERDDAP "catalog"

# In[2]:


# New Axiom ERDDAP for OOI
server = 'http://erddap.dataexplorer.oceanobservatories.org/erddap'
protocol = 'tabledap'


# In[3]:


e = ERDDAP(server=server, protocol=protocol)


# A search for everything looks like this. The only effective filtering parameters being passed are `protocol=tabledap` and `response=csv`.

# In[4]:


df = pd.read_csv(urlopen(e.get_search_url(response='csv', search_for='all')))
len(df)


# Now we'll refine our search by adding temporal, bounding box and variable constraints. 

# In[5]:


min_time = '2018-07-01T00:00:00Z'
max_time = '2018-07-15T00:00:00Z'
min_lon, max_lon = -127, -123.75
min_lat, max_lat = 43, 48
standard_name = 'sea_water_practical_salinity'

kw = {
    'standard_name': standard_name,  
    'min_lon': min_lon,'max_lon': max_lon,'min_lat': min_lat,'max_lat': max_lat, 
    'min_time': min_time,'max_time': max_time, 
}


# In[6]:


search_url = e.get_search_url(response='csv', **kw)
search_df = pd.read_csv(urlopen(search_url))
search_df = search_df[['Institution', 'Dataset ID','tabledap']]
search_df


# ## 2. Read data from one dataset, manually
# Let's inspect a specific `dataset_id`.

# In[7]:


dataset_id = 'ooi-ce06issm-sbd17-06-ctdbpc000'


# Construct the ERDDAP URL to get the data

# In[8]:


e.dataset_id = dataset_id
e.constraints = {'time>=': min_time,'time<=': max_time}
e.response='csv'
e.variables = [ 'time', e.get_var_by_attr(dataset_id=dataset_id, standard_name=standard_name)[0]]
print(e.get_download_url())


# Read the data into Xarray

# In[9]:


ds = e.to_xarray(decode_times=True)
#ds = ds.swap_dims({'row':'time'})
#[ds[var].plot() for var in ds.data_vars];


# In[10]:


ds


# ## 3.  Read data from all datasets, automatically 

# Let's narrow this down by only taking the "CTDBP" data

# In[11]:


ctdbp = search_df[search_df['Dataset ID'].str.contains("ctdbp")].reset_index()
ctdbp


# Let's just take five datasets to speed up demo

# In[15]:


ndatasets = 5
df_list = []
hv_list = []
for dataset_id in ctdbp['Dataset ID'].values:
    e.dataset_id = dataset_id
    e.variables = [ 
        'time', 
        e.get_var_by_attr(dataset_id=dataset_id,  standard_name=standard_name)[0]
    ]
    try: 
        ds = e.to_xarray(decode_times=True)
#        ds = ds.swap_dims({'row':'time'})
        df_list.append(ds)
        print(dataset_id)
        #[ds[var].plot() for var in ds.data_vars];
        hv_list.append(ds[e.variables[-1]].hvplot(label=dataset_id))
    except:
        pass
    if len(df_list)==ndatasets: break


# In[13]:


hv.Overlay(hv_list)