rasterio.warp.reproject
¶We are observing significant difference in results of loaded data based on the size of the destination window used to load those pixels. This behaviour only happens for some resampling methods and not for others. Given how sampling works it is expected for bilinear
resampling to produce different results based on the read window, however effect we are observing is more than just that. Difference is more pronounced for thiner read windows. This notebook demonstrates the effect.
We use odc-geo
for some visualizations, but actual load is with rasterio.warp.reproject
.
pip install odc-geo
%matplotlib inline
import numpy as np
import rasterio
import rasterio.warp
from affine import Affine
from IPython.display import Image, display
from matplotlib import pyplot as plt
from odc.geo._compress import compress
from odc.geo.geobox import GeoBox
from odc.geo.xr import xr_zeros
from rasterio.warp import Resampling
def load_into(uri, dst, resampling, band=1):
with rasterio.Env(AWS_NO_SIGN_REQUEST="YES"):
with rasterio.DatasetReader(uri) as src:
if isinstance(dst, GeoBox):
dst = xr_zeros(dst, dtype=src.dtypes[band - 1])
rasterio.warp.reproject(
rasterio.band(src, band),
dst.data,
dst_crs=str(dst.odc.crs),
dst_transform=dst.odc.affine,
resampling=resampling,
)
return dst
def load_part(xx_whole, roi, resampling):
"""
Replace `xx_whole[roi]` with separately loaded pixels for the same region.
"""
xx = xx_whole.copy()
xx[roi] = load_into(uri, xx_whole.odc.geobox[roi], resampling)
return xx
uri = (
"s3://dea-public-data/baseline/ga_s2am_ard_3/49/JGN/2020/01/05/20200105T035212/"
"ga_s2am_nbart_3-2-1_49JGN_2020-01-05_final_band04.tif"
)
d_gbox = GeoBox.from_bbox(
(-1827140, -2819940, -1824000, -2817250),
"epsg:3577",
resolution=10,
)
strips_to_test = [5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 80, 90, 100]
rois = [np.s_[:, -s:] for s in strips_to_test]
experiment = {}
for resampling in [
Resampling.bilinear,
Resampling.cubic,
Resampling.nearest,
Resampling.average,
]:
print(f"Processing {resampling.name}")
xx_whole = load_into(uri, d_gbox, resampling)
xx = [load_part(xx_whole, roi, resampling) for roi in rois]
diffs = [x - xx_whole for x in xx]
stats = np.asarray(
[
(strip, d[roi].mean().data.item(), d[roi].std().data.item())
for strip, roi, d in zip(strips_to_test, rois, diffs)
]
)
experiment[resampling.name] = {
"reference": xx_whole,
"images": xx,
"diffs": diffs,
"stats": stats,
}
print("..done")
Processing bilinear Processing cubic Processing nearest Processing average ..done
exp = experiment["cubic"]
imgs = [exp["reference"], *exp['images'][2:4], *exp['images'][-1:]]
ref = imgs[0]
for xx in imgs:
diff = ref - xx
p1 = xx.odc.colorize(cmap="bone", vmin=500, vmax=3000)
p2 = diff.odc.colorize(cmap="RdBu", vmin=-50, vmax=50)
display(Image(data=compress(np.hstack([p1, p2]))))
Notice how the intensity of the difference image is much higher for narrower strips compared to wider ones.
Below we visualize average difference between reference and a strip. Images narrower than 20 pixels (200m in world units) have significantly higher error rates when using bilinear,cubic
.
fig, axs = plt.subplots(len(experiment), figsize=(8, 8))
for ax, (resampling, exp) in zip(axs, experiment.items()):
x, y, std = exp['stats'].T
ci = np.sqrt(std) * 3
ax.plot(x, y, ".-")
ax.fill_between(x, (y - ci), (y + ci), color="b", alpha=0.1)
ax.set_ylabel("Pixel difference")
ax.set_title(f"{resampling}")
ax.axis([5, 100, -60, 60])
axs[-1].set_xlabel("Width of the load window in pixels");