This is the first of a series of notebooks to tease the statistics on AWRA and related data for Silvereye.
The current notebook is for the initial trial-error.
Merge capabilities from:
Plan is:
import xarray as xr
import os
import sys
import pandas as pd
from functools import wraps
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns # noqa, pandas aware plotting library
if ('SP_SRC' in os.environ):
root_src_dir = os.environ['SP_SRC']
elif sys.platform == 'win32':
root_src_dir = r'C:\src\csiro\stash\silverpieces'
else:
root_src_dir = '/home/per202/src/csiro/stash/silverpieces'
pkg_src_dir = root_src_dir
sys.path.append(pkg_src_dir)
if ('SP_DATA' in os.environ):
root_data_dir = os.environ['SP_DATA']
elif sys.platform == 'win32':
root_data_dir = r'C:\data\silverpieces'
else:
root_data_dir = '/home/per202/data/silverpieces'
#from silverpieces.blah import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from siphon.catalog import TDSCatalog
import requests
# from distributed import Client, LocalCluster
# from flask import Flask, redirect, url_for, request, render_template, jsonify, abort
# from flask_cors import CORS
# import intake
# import regionmask
# import random
# from flask import jsonify, make_response
# import json
# import uuid
# from flask.logging import default_handler
# import geopandas as gpd
# from owslib.wps import WebProcessingService
# from owslib.wps import printInputOutput
# from birdy import WPSClient
# import birdy
# import tarfile
# import tempfile
# import shutil
# import urllib
import urllib.request
# from urllib.parse import urlparse
# from datetime import datetime
# from dateutil.relativedelta import relativedelta
I tried to use xarray-tips-and-tricks by similarity but while this looked OK, retrieving values ended up in a RuntimeError: NetCDF: Access failure
. Park this. This may be an issue with the thredds server serving awra data.
# #base_url = 'http://www.esrl.noaa.gov/psd/thredds/dodsC/Datasets/ncep.reanalysis/surface/air.sig995'
# base_url = 'http://data-mel.it.csiro.au/thredds/dodsC/catch_all/lw_oznome/AWRA-L_historical_data/qtot/qtot_avg_'
# f = ['http://data-mel.it.csiro.au/thredds/dodsC/catch_all/lw_oznome/AWRA-L_historical_data/qtot/qtot_avg_' + str(yr) + '.nc' for yr in [2011, 2012]]
# files = [f'{base_url}{year}.nc' for year in range(2010, 2015)]
# files
# ds = xr.open_mfdataset(files)
# ds
# x = ds.qtot_avg
# x[1,1,1].values
cat_url = 'http://data-mel.it.csiro.au/thredds/catalog/catch_all/lw_oznome/AWRA-L_historical_data/qtot/catalog.xml'
cat = TDSCatalog(cat_url)
ds_names = [str(x) for x in cat.datasets]
m_indices = [i for i in range(len(ds_names)) if ds_names[i].startswith('qtot') and ds_names[i].endswith('nc')]
# somehow sorted
m_indices.reverse()
ds = [cat.datasets[i] for i in m_indices]
ds[0].access_urls
dsets = [cds.remote_access(use_xarray=True).reset_coords(drop=True).chunk({'latitude': 681, 'longitude': 841}) # 731, latitude: 681, longitude: 841
for cds in ds[:3]] # eventually want to use the whole catalog here
#dsets = [cds.remote_access(use_xarray=True).reset_coords(drop=True)
# for cds in ds[:3]] # eventually want to use the whole catalog here
len(dsets)
blah = xr.auto_combine(dsets)
blah
x = blah.qtot_avg
x['time'].values
RuntimeError: NetCDF: Access failure
. That said, it worked one time out of ~10 so this is probably more a problem with the thredds server.
# Random but frequently ends up RuntimeError: NetCDF: Access failure
m = x[1,:,:].values
plt.imshow(m)
%time b_box = x.isel(latitude=slice(600,700), longitude=slice(650,750))
%time b_box.isel(time=310).plot()
%time plt.show()
Starting from some of code J Yu had authored:
ds_desc = 'http://data-cbr.it.csiro.au/thredds/catalog/catch_all/Digiscape_Climate_Data_Portal/silo/climate/catalog.xml?dataset=allDatasetScan/Digiscape_Climate_Data_Portal/silo/climate/daily_rain.nc'
catalog = TDSCatalog(ds_desc)
catalog.datasets
dataset = catalog.datasets[0]
type(dataset)
blah
ncss = dataset.subset()
type(ncss)
query = ncss.query()
import pandas as pd
tt = pd.to_datetime("2014-01-01")
query.lonlat_box(north=-40, south=-44, east=149, west=144).time(tt)
query.accept('netcdf4')
query.variables('daily_rain')
# With the following I may have bumped into https://github.com/Unidata/thredds-docker/issues/216
data = ncss.get_data(query)
Try another approach:
# Load the dataset
cat = catalog
dataset_name = sorted(cat.datasets.keys())[-1]
dataset_name
dataset = cat.datasets[dataset_name]
ds = dataset.remote_access(service='OPENDAP')
from xarray.backends import NetCDF4DataStore
ds = NetCDF4DataStore(ds)
ds = xr.open_dataset(ds)
x = ds.daily_rain
x
x.isel(time=22000).plot()
%time b_box = x.isel(lat=slice(600,700), lon=slice(650,750))
%time b_box.isel(time=22000).plot()
%time plt.show()
Below are attempts to use Numpy-ic ways to calculate quantile classes. No joy.
convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)')
convolve(np.eye(2), [1, 2])
convolve = np.vectorize(np.convolve)
convolve(np.eye(2), [1, 2])
convolve(np.eye(2), [1, 2])
z = three_years_rains[-1]
z.searchsorted(y)
def searchsorted2d(a,b):
m,n = a.shape
max_num = np.maximum(a.max() - a.min(), b.max() - b.min()) + 1
r = max_num*np.arange(a.shape[0])[:,None]
p = np.searchsorted( (a+r).ravel(), (b+r).ravel() ).reshape(m,-1)
return p - n*(np.arange(m)[:,None])
def searchsorted2d(mat,ensembles):
m,n = mat.shape
max_num = np.maximum(a.max() - a.min(), b.max() - b.min()) + 1
r = max_num*np.arange(a.shape[0])[:,None]
p = np.searchsorted( (a+r).ravel(), (b+r).ravel() ).reshape(m,-1)
return p - n*(np.arange(m)[:,None])
np.random.seed(123)
mat = np.sort(np.arange(12)).reshape(3, 4)
mat = mat.reshape(3, 4)
mat
mat[0]
v_searchsorted = np.vectorize(np.searchsorted, signature='(n),(m)->(k)')
v_searchsorted(mat, np.array([0.5, 5.5, 10.1]))
v_searchsorted = np.vectorize(np.searchsorted, signature='(n,m),(n,m)->(n)')
v_searchsorted(mat, np.array([0.5, 5.5]))
np.searchsorted(np.array([-1, 0, 1, 2]), np.array([-1, 0, 1.1, 2]))
y.groupby_bins(group, bins, right: bool = True, labels=None, precision: int = 3, include_lowest: bool = False, squeeze: bool = True, restore_coord_dims: Optional[bool] = None)
z.shape, y.shape
very_drier = z <= y[0,:,:]
very_drier.plot()
z[50,50], y[:,50, 50]
np.argmin( (200 > y[:,50, 50].values) )
3000 > y[:,50, 50].values
np.argmin(z[50,50].values > y[:,50, 50].values)
np.argmin(z[50,50].values < y[:,50, 50].values)
(z[50,50].values > y[:,50, 50].values) , (z[50,50].values < y[:,50, 50].values)
np.eye(4)
np.argmax(z[50,50].values > y[:,50, 50].values)
np.argmax(z[50,50].values < y[:,50, 50].values)
np.searchsorted(y[:,50, 50].values, z[50,50].values)
np.searchsorted(np.arange(10), 23)
def searchsorted(a, b):
func = lambda x, y: np.searchsorted(x[:,,].values, y[].values)
return xr.apply_ufunc(func, a, b)
xr.apply_ufunc
dim = 'time'
xr.apply_ufunc(np.searchsorted,
y, z,
input_core_dims=[[dim], []])
import scipy.stats
def earth_mover_distance(first_samples,
second_samples,
dim='ensemble'):
return apply_ufunc(scipy.stats.wasserstein_distance,
first_samples, second_samples,
input_core_dims=[[dim], [dim]],
vectorize=True)