#!/usr/bin/env python # coding: utf-8 # ### Odd behaviour in `rasterio.warp.reproject` # # We are observing significant difference in results of loaded data based on the size of the destination window used to load those pixels. This behaviour only happens for some resampling methods and not for others. Given how sampling works it is expected for `bilinear` resampling to produce different results based on the read window, however effect we are observing is more than just that. Difference is more pronounced for thiner read windows. This notebook demonstrates the effect. # # We use `odc-geo` for some visualizations, but actual load is with `rasterio.warp.reproject`. # # ``` # pip install odc-geo # ``` # # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import rasterio import rasterio.warp from affine import Affine from IPython.display import Image, display from matplotlib import pyplot as plt from odc.geo._compress import compress from odc.geo.geobox import GeoBox from odc.geo.xr import xr_zeros from rasterio.warp import Resampling def load_into(uri, dst, resampling, band=1): with rasterio.Env(AWS_NO_SIGN_REQUEST="YES"): with rasterio.DatasetReader(uri) as src: if isinstance(dst, GeoBox): dst = xr_zeros(dst, dtype=src.dtypes[band - 1]) rasterio.warp.reproject( rasterio.band(src, band), dst.data, dst_crs=str(dst.odc.crs), dst_transform=dst.odc.affine, resampling=resampling, ) return dst def load_part(xx_whole, roi, resampling): """ Replace `xx_whole[roi]` with separately loaded pixels for the same region. """ xx = xx_whole.copy() xx[roi] = load_into(uri, xx_whole.odc.geobox[roi], resampling) return xx uri = ( "s3://dea-public-data/baseline/ga_s2am_ard_3/49/JGN/2020/01/05/20200105T035212/" "ga_s2am_nbart_3-2-1_49JGN_2020-01-05_final_band04.tif" ) d_gbox = GeoBox.from_bbox( (-1827140, -2819940, -1824000, -2817250), "epsg:3577", resolution=10, ) strips_to_test = [5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 80, 90, 100] rois = [np.s_[:, -s:] for s in strips_to_test] experiment = {} for resampling in [ Resampling.bilinear, Resampling.cubic, Resampling.nearest, Resampling.average, ]: print(f"Processing {resampling.name}") xx_whole = load_into(uri, d_gbox, resampling) xx = [load_part(xx_whole, roi, resampling) for roi in rois] diffs = [x - xx_whole for x in xx] stats = np.asarray( [ (strip, d[roi].mean().data.item(), d[roi].std().data.item()) for strip, roi, d in zip(strips_to_test, rois, diffs) ] ) experiment[resampling.name] = { "reference": xx_whole, "images": xx, "diffs": diffs, "stats": stats, } print("..done") # In[2]: exp = experiment["cubic"] imgs = [exp["reference"], *exp['images'][2:4], *exp['images'][-1:]] ref = imgs[0] for xx in imgs: diff = ref - xx p1 = xx.odc.colorize(cmap="bone", vmin=500, vmax=3000) p2 = diff.odc.colorize(cmap="RdBu", vmin=-50, vmax=50) display(Image(data=compress(np.hstack([p1, p2])))) # Notice how the intensity of the difference image is much higher for narrower strips compared to wider ones. # # Below we visualize average difference between reference and a strip. Images narrower than 20 pixels (200m in world units) have significantly higher error rates when using `bilinear,cubic`. # In[3]: fig, axs = plt.subplots(len(experiment), figsize=(8, 8)) for ax, (resampling, exp) in zip(axs, experiment.items()): x, y, std = exp['stats'].T ci = np.sqrt(std) * 3 ax.plot(x, y, ".-") ax.fill_between(x, (y - ci), (y + ci), color="b", alpha=0.1) ax.set_ylabel("Pixel difference") ax.set_title(f"{resampling}") ax.axis([5, 100, -60, 60]) axs[-1].set_xlabel("Width of the load window in pixels"); # ----------