Subset a test data set¶

Subset (AWRA?) daily data sets to a more manageable size for initial elaboration. ACT or Tassie.

Dependencies imports¶

In [ ]:

import xarray as xr
import os
import sys
import pandas as pd
from functools import wraps
import numpy as np

import matplotlib.pyplot as plt

In [ ]:

import seaborn as sns  # noqa, pandas aware plotting library

In [ ]:

if ('SP_SRC' in os.environ):
    root_src_dir = os.environ['SP_SRC']
elif sys.platform == 'win32':
    root_src_dir = r'C:\src\csiro\stash\silverpieces'
else:
    root_src_dir = '/home/per202/src/csiro/stash/silverpieces'

pkg_src_dir = root_src_dir
sys.path.append(pkg_src_dir)

In [ ]:

if ('SP_DATA' in os.environ):
    root_data_dir = os.environ['SP_DATA']
elif sys.platform == 'win32':
    root_data_dir = r'C:\data\silverpieces'
else:
    root_data_dir = '/home/per202/data/silverpieces'

In [ ]:

#from silverpieces.blah import *

In [ ]:

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [ ]:

from siphon.catalog import TDSCatalog

subset data to tasmania¶

I did not manage to get the AWRA data served by thredds.

Try another approach:

In [ ]:

ds_desc = 'http://data-cbr.it.csiro.au/thredds/catalog/catch_all/Digiscape_Climate_Data_Portal/silo/climate/catalog.xml?dataset=allDatasetScan/Digiscape_Climate_Data_Portal/silo/climate/daily_rain.nc'
catalog = TDSCatalog(ds_desc)

In [ ]:

# Load the dataset
cat = catalog
dataset_name = sorted(cat.datasets.keys())[-1]

In [ ]:

dataset_name

In [ ]:

dataset = cat.datasets[dataset_name]
ds = dataset.remote_access(service='OPENDAP')

In [ ]:

from xarray.backends import NetCDF4DataStore

In [ ]:

ds = NetCDF4DataStore(ds)
ds = xr.open_dataset(ds)

In [ ]:

x = ds.daily_rain

In [ ]:

x.isel(time=22000).plot()

In [ ]:

%time b_box = x.isel(lat=slice(600,700), lon=slice(650,750))

In [ ]:

%time b_box.isel(time=22000).plot()

In [ ]:

%time plt.show()

In [ ]:

b_box

In [ ]:

lc = x.coords['lon']
la = x.coords['lat']
lt = x.coords['time']

In [ ]:

lt

In [ ]:

start_time = pd.to_datetime('2007-01-01')
end_time = pd.to_datetime('2018-12-31')

In [ ]:

#query.lonlat_box(north=-40, south=-44, east=149, west=144).time(tt)
tassie = x.loc[dict(lon=lc[(lc>144)&(lc<149)], lat=la[(la>-44)&(la<-40)], time=slice(start_time, end_time))]

In [ ]:

tassie

In [ ]:

%time tassie.isel(time=4300).plot()

In [ ]:

fn = os.path.join(root_data_dir, 'tassie_silo_rain.nc')
if not os.path.exists(fn):
    tassie.to_netcdf(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))

In [ ]:

del(tassie)

In [ ]:

tassie = xr.open_dataset(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))

In [ ]:

tassie

In [ ]:

dr = tassie.daily_rain

In [ ]:

dr.isel(time=4300).plot()

Use cases¶

3 year period statistic compared to all 3 years periods in the historical record¶

We want to be able to compare a grid of statistics for a period compared to all periods of similar lengths. The start and end of the period should be as arbitrary as possible. The sliding window could however be limited or fixed to a year: it is probably moot to compare windows with shifted seasonality.

How does the cumulated rainfall 2016-2018 over TAS compare with all 3 year periods over the record?¶

In [ ]:

start_time = pd.to_datetime('2016-01-01')
end_time = pd.to_datetime('2018-12-31')

In [ ]:

dr.isel(time = 1)

In [ ]:

blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim='time',skipna=False)

In [ ]:

blah.plot()

In [ ]:

TIME_DIMNAME = 'time'

blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim='time',skipna=False)

In [ ]:

start_time = pd.to_datetime('2007-01-01')
end_time = pd.to_datetime('2009-12-31')

In [ ]:

from datetime import date
from dateutil.relativedelta import relativedelta # $ pip install python-dateutil

In [ ]:

print(start_time + relativedelta(years=+1))

In [ ]:

cumulated = [dr.loc[dict(time=slice(start_time + relativedelta(years=year), end_time+ relativedelta(years=year)))].sum(dim='time',skipna=False) for year in range(10)]

In [ ]:

cumulated[0].plot()

In [ ]:

blah = xr.concat(cumulated)

In [ ]:

blah

In [ ]:

blah = xr.auto_combine([xr.Dataset(cumulated[i]) for i in range(len(cumulated))])

g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3)

In [ ]: