#!/usr/bin/env python
# coding: utf-8

# ### Odd behaviour in `rasterio.warp.reproject`
# 
# We are observing significant difference in results of loaded data based on the size of the destination window used to load those pixels. This behaviour only happens for some resampling methods and not for others. Given how sampling works it is expected for `bilinear` resampling to produce different results based on the read window, however effect we are observing is more than just that. Difference is more pronounced for thiner read windows. This notebook demonstrates the effect.
# 
# We use `odc-geo` for some visualizations, but actual load is with `rasterio.warp.reproject`.
# 
# ```
# pip install odc-geo
# ```
# 

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import rasterio
import rasterio.warp
from affine import Affine
from IPython.display import Image, display
from matplotlib import pyplot as plt
from odc.geo._compress import compress
from odc.geo.geobox import GeoBox
from odc.geo.xr import xr_zeros
from rasterio.warp import Resampling


def load_into(uri, dst, resampling, band=1):
    with rasterio.Env(AWS_NO_SIGN_REQUEST="YES"):
        with rasterio.DatasetReader(uri) as src:
            if isinstance(dst, GeoBox):
                dst = xr_zeros(dst, dtype=src.dtypes[band - 1])

            rasterio.warp.reproject(
                rasterio.band(src, band),
                dst.data,
                dst_crs=str(dst.odc.crs),
                dst_transform=dst.odc.affine,
                resampling=resampling,
            )

    return dst


def load_part(xx_whole, roi, resampling):
    """
    Replace `xx_whole[roi]` with separately loaded pixels for the same region.
    """
    xx = xx_whole.copy()
    xx[roi] = load_into(uri, xx_whole.odc.geobox[roi], resampling)
    return xx


uri = (
    "s3://dea-public-data/baseline/ga_s2am_ard_3/49/JGN/2020/01/05/20200105T035212/"
    "ga_s2am_nbart_3-2-1_49JGN_2020-01-05_final_band04.tif"
)
d_gbox = GeoBox.from_bbox(
    (-1827140, -2819940, -1824000, -2817250),
    "epsg:3577",
    resolution=10,
)

strips_to_test = [5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 80, 90, 100]
rois = [np.s_[:, -s:] for s in strips_to_test]

experiment = {}
for resampling in [
    Resampling.bilinear,
    Resampling.cubic,
    Resampling.nearest,
    Resampling.average,
]:
    print(f"Processing {resampling.name}")
    xx_whole = load_into(uri, d_gbox, resampling)

    xx = [load_part(xx_whole, roi, resampling) for roi in rois]
    diffs = [x - xx_whole for x in xx]
    stats = np.asarray(
        [
            (strip, d[roi].mean().data.item(), d[roi].std().data.item())
            for strip, roi, d in zip(strips_to_test, rois, diffs)
        ]
    )

    experiment[resampling.name] = {
        "reference": xx_whole,
        "images": xx,
        "diffs": diffs,
        "stats": stats,
    }
print("..done")


# In[2]:


exp = experiment["cubic"]
imgs = [exp["reference"], *exp['images'][2:4], *exp['images'][-1:]]
ref = imgs[0]
for xx in imgs:
    diff = ref - xx
    p1 = xx.odc.colorize(cmap="bone", vmin=500, vmax=3000)
    p2 = diff.odc.colorize(cmap="RdBu", vmin=-50, vmax=50)
    display(Image(data=compress(np.hstack([p1, p2]))))


# Notice how the intensity of the difference image is much higher for narrower strips compared to wider ones.
# 
# Below we visualize average difference between reference and a strip. Images narrower than 20 pixels (200m in world units) have significantly higher error rates when using `bilinear,cubic`.

# In[3]:


fig, axs = plt.subplots(len(experiment), figsize=(8, 8))

for ax, (resampling, exp) in zip(axs, experiment.items()):
    x, y, std = exp['stats'].T
    ci = np.sqrt(std) * 3

    ax.plot(x, y, ".-")
    ax.fill_between(x, (y - ci), (y + ci), color="b", alpha=0.1)
    ax.set_ylabel("Pixel difference")
    ax.set_title(f"{resampling}")
    ax.axis([5, 100, -60, 60])

axs[-1].set_xlabel("Width of the load window in pixels");


# ----------