%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import xarray as xr
from skdownscale.pointwise_models import BcsdPrecipitation, BcsdTemperature
# utilities for plotting cdfs
def plot_cdf(ax=None, **kwargs):
if ax:
plt.sca(ax)
else:
ax = plt.gca()
for label, X in kwargs.items():
vals = np.sort(X, axis=0)
pp = scipy.stats.mstats.plotting_positions(vals)
ax.plot(pp, vals, label=label)
ax.legend()
return ax
def plot_cdf_by_month(ax=None, **kwargs):
fig, axes = plt.subplots(4, 3, sharex=True, sharey=False, figsize=(12, 8))
for label, X in kwargs.items():
for month, ax in zip(range(1, 13), axes.flat):
vals = np.sort(X[X.index.month == month], axis=0)
pp = scipy.stats.mstats.plotting_positions(vals)
ax.plot(pp, vals, label=label)
ax.set_title(month)
ax.legend()
return ax
# open a small dataset for training
training = xr.open_zarr("../data/downscale_test_data.zarr.zip", group="training")
training
# open a small dataset of observations (targets)
targets = xr.open_zarr("../data/downscale_test_data.zarr.zip", group="targets")
targets
# extract 1 point of training data for precipitation and temperature
X_temp = training.isel(point=0).to_dataframe()[["T2max"]].resample("MS").mean() - 273.13
X_pcp = training.isel(point=0).to_dataframe()[["PREC_TOT"]].resample("MS").sum() * 24
display(X_temp.head(), X_pcp.head())
# extract 1 point of target data for precipitation and temperature
y_temp = targets.isel(point=0).to_dataframe()[["Tmax"]].resample("MS").mean()
y_pcp = targets.isel(point=0).to_dataframe()[["Prec"]].resample("MS").sum()
display(y_temp.head(), y_pcp.head())
# Fit/predict the BCSD Temperature model
bcsd_temp = BcsdTemperature()
bcsd_temp.fit(X_temp, y_temp)
out = bcsd_temp.predict(X_temp) + X_temp
plot_cdf(X=X_temp, y=y_temp, out=out)
out.plot()
plot_cdf_by_month(X=X_temp, y=y_temp, out=out)
# Fit/predict the BCSD Precipitation model
bcsd_pcp = BcsdPrecipitation()
bcsd_pcp.fit(X_pcp, y_pcp)
out = bcsd_pcp.predict(X_pcp) * X_pcp
plot_cdf(X=X_pcp, y=y_pcp, out=out)
plot_cdf_by_month(X=X_pcp, y=y_pcp, out=out)