Adapted from https://github.com/suzanne64/ATL11/blob/master/intro_to_ATL11.ipynb
import glob
import dask
import dask.array
import datashader
import geopandas as gpd
import holoviews as hv
import hvplot.dask
import hvplot.pandas
import hvplot.xarray
import intake
import matplotlib.cm
import numpy as np
import pandas as pd
import pygmt
import shapely
import xarray as xr
import deepicedrain
client = dask.distributed.Client(n_workers=32, threads_per_worker=1)
client
Client
|
Cluster
|
Let's start by getting our data and running some preprocessing steps:
# Xarray open_dataset preprocessor to add fields based on input filename.
# Adapted from the intake.open_netcdf._add_path_to_ds function.
add_path_to_ds = lambda ds: ds.assign_coords(
coords=intake.source.utils.reverse_format(
format_string="ATL11.001z123/ATL11_{referencegroundtrack:04d}1x_{}_{}_{}.zarr",
resolved_string=ds.encoding["source"],
)
)
# Load dataset from all Zarr stores
# Aligning chunks spatially along cycle_number (i.e. time)
ds: xr.Dataset = xr.open_mfdataset(
paths="ATL11.001z123/ATL11_*_003_01.zarr",
chunks="auto",
engine="zarr",
combine="nested",
concat_dim="ref_pt",
parallel="True",
preprocess=add_path_to_ds,
backend_kwargs={"consolidated": True},
)
# ds = ds.unify_chunks().compute()
# TODO use intake, wait for https://github.com/intake/intake-xarray/issues/70
# source = intake.open_ndzarr(url="ATL11.001z123/ATL11_0*.zarr")
To center our plot on the South Pole, we'll reproject the original longitude/latitude coordinates to the Antarctic Polar Stereographic (EPSG:3031) projection.
ds["x"], ds["y"] = deepicedrain.lonlat_to_xy(
longitude=ds.longitude, latitude=ds.latitude
)
# Also set x/y as coordinates in xarray.Dataset instead of longitude/latitude
ds: xr.Dataset = ds.set_coords(names=["x", "y"])
ds: xr.Dataset = ds.reset_coords(names=["longitude", "latitude"])
To get more human-readable datetimes, we'll convert the delta_time attribute from the original GPS time format (nanoseconds since the beginning of ICESat-2 starting epoch) to Coordinated Universal Time (UTC). The reference date for the ICESat-2 Epoch is 2018 January 1st according to https://github.com/SmithB/pointCollection/blob/master/is2_calendar.py#L11-L15
TODO: Account for leap seconds in the future.
# ds["utc_time"] = ds.delta_time.rename(new_name_or_name_dict="utc_time")
ds["utc_time"] = deepicedrain.deltatime_to_utctime(dataarray=ds.delta_time)
ds["h_corr"] = ds.h_corr.where(cond=ds.fit_quality == 0)
Take a geographical subset and save to a NetCDF/Zarr format for distribution.
# Antarctic bounding box locations with EPSG:3031 coordinates
regions = gpd.read_file(filename="deepicedrain/deepicedrain_regions.geojson")
regions: gpd.GeoDataFrame = regions.set_index(keys="placename")
# Subset to essential columns
essential_columns: list = [
"x",
"y",
"utc_time",
"h_corr",
"longitude",
"latitude",
"delta_time",
"cycle_number",
]
# Do the actual computation to find data points within region of interest
placename: str = "kamb" # Select Kamb Ice Stream region
region: deepicedrain.Region = deepicedrain.Region.from_gdf(gdf=regions.loc[placename])
ds_subset: xr.Dataset = region.subset(data=ds)
ds_subset = ds_subset.unify_chunks()
ds_subset = ds_subset.compute()
# Save to Zarr/NetCDF formats for distribution
ds_subset.to_zarr(
store=f"ATLXI/ds_subset_{placename}.zarr", mode="w", consolidated=True
)
ds_subset.to_netcdf(path=f"ATLXI/ds_subset_{placename}.nc", engine="h5netcdf")
# Look at Cycle Number 7 only for plotting
points_subset = hv.Points(
data=ds_subset.sel(cycle_number=7)[[*essential_columns]],
label="Cycle_7",
kdims=["x", "y"],
vdims=["utc_time", "h_corr", "cycle_number", "referencegroundtrack"],
datatype=["xarray"],
)
df_subset = points_subset.dframe()
# Plot our subset of points on an interactive map
df_subset.hvplot.points(
title=f"Elevation (metres) at Cycle 7",
x="x",
y="y",
c="referencegroundtrack",
cmap="Set3",
# rasterize=True,
hover=True,
datashade=True,
dynspread=True,
)
To make data analysis and plotting easier,
let's flatten our n-dimensional xarray.Dataset
to a 2-dimensiontal pandas.DataFrame
table format.
There are currently 8 cycles (as of July 2020),
and by selecting just one cycle at a time,
we can see what the height (h_corr
)
of the ice is like at that time.
cycle_number: int = 7
dss = ds.sel(cycle_number=cycle_number)[[*essential_columns]]
print(dss)
<xarray.Dataset> Dimensions: (ref_pt: 229035648) Coordinates: x (ref_pt) float64 -1.748e+05 -1.748e+05 ... 8.298e+04 y (ref_pt) float64 1.185e+06 1.185e+06 ... -1.197e+06 delta_time (ref_pt) timedelta64[ns] dask.array<chunksize=(26339,), meta=np.ndarray> cycle_number int64 7 referencegroundtrack (ref_pt) int64 1 1 1 1 1 ... 1387 1387 1387 1387 1387 * ref_pt (ref_pt) int64 1565331 1565334 ... 1443582 1443585 Data variables: utc_time (ref_pt) datetime64[ns] dask.array<chunksize=(26339,), meta=np.ndarray> h_corr (ref_pt) float32 dask.array<chunksize=(52678,), meta=np.ndarray> longitude (ref_pt) float64 dask.array<chunksize=(52678,), meta=np.ndarray> latitude (ref_pt) float64 dask.array<chunksize=(52678,), meta=np.ndarray> Attributes: ATL06_xover_field_list: ['delta_time', 'h_li', 'h_li_sigma', 'latitude'... L_search_AT: 60 L_search_XT: 65 N_coeffs: 8 N_poly_coeffs: 8 N_search: 3.0 ReferenceGroundTrack: 1.0 beam_pair: 1 beam_spacing: 90 equatorial_radius: 6378137 first_cycle: 2 last_cycle: 8 max_fit_iterations: 20 pair_yatc_ctr_tol: 1000 polar_radius: 6356752.3 poly_exponent_list: [[1, 0], [0, 1], [2, 0], [1, 1], [0, 2], [3, 0]... poly_exponent_x: [1, 0, 2, 1, 0, 3, 2, 1] poly_exponent_y: [0, 1, 0, 1, 2, 0, 1, 2] poly_max_degree_AT: 3 poly_max_degree_XT: 2 seg_atc_spacing: 100 seg_number_skip: 3.0 seg_sigma_threshold_min: 0.05 slope_change_t0: 29548800 t_scale: 31557600.0 xy_scale: 100.0
points = hv.Points(
data=dss,
label=f"Cycle_{cycle_number}",
kdims=["x", "y"],
vdims=["utc_time", "h_corr", "cycle_number"],
datatype=["xarray"],
)
df = points.dframe() # convert to pandas.DataFrame, slow
df = df.dropna() # drop empty rows
print(len(df))
df.head()
171089724
x | y | utc_time | h_corr | cycle_number | |
---|---|---|---|---|---|
0 | -174761.761971 | 1.185317e+06 | 2020-03-26 15:32:15.957044095 | 2021.187378 | 7 |
1 | -174781.279702 | 1.185372e+06 | 2020-03-26 15:32:15.964079395 | 2021.272949 | 7 |
2 | -174800.797361 | 1.185428e+06 | 2020-03-26 15:32:15.972532213 | 2021.388062 | 7 |
3 | -174820.314262 | 1.185483e+06 | 2020-03-26 15:32:15.980990678 | 2021.455933 | 7 |
4 | -174839.832243 | 1.185538e+06 | 2020-03-26 15:32:15.989444897 | 2021.541260 | 7 |
Let's take a look at an interactive map of the ICESat-2 ATL11 height for Cycle 6! We'll plot a random sample (n=5 million) of the points instead of the whole dataset, it should give a good enough picture.
df.sample(n=5_000_000).hvplot.points(
title=f"Elevation (metres) at Cycle {cycle_number}",
x="x",
y="y",
c="h_corr",
cmap="Blues",
rasterize=True,
hover=True,
)
Let's take a look at the change in elevation over a year (4 ICESat-2 cycles). From our loaded dataset (ds), we'll select Cycles 3 and 7, and subtract the height (h_corr) between them to get a height difference (dh).
dh: xr.DataArray = deepicedrain.calculate_delta(
dataset=ds, oldcyclenum=3, newcyclenum=7, variable="h_corr"
)
# Persist data in memory
dh = dh.persist()
delta_h: xr.Dataset = dh.dropna(dim="ref_pt").to_dataset(name="delta_height")
print(delta_h)
<xarray.Dataset> Dimensions: (ref_pt: 136029524) Coordinates: * ref_pt (ref_pt) int64 1421511 1421514 ... 1443594 1443597 referencegroundtrack (ref_pt) int64 1 1 1 1 1 ... 1364 1364 1364 1364 1364 x (ref_pt) float64 6.678e+05 6.677e+05 ... -1.495e+04 y (ref_pt) float64 -1.481e+06 -1.481e+06 ... 1.199e+06 Data variables: delta_height (ref_pt) float32 dask.array<chunksize=(45,), meta=np.ndarray>
df_dh: pd.DataFrame = delta_h.to_dataframe()
print(df_dh.head())
referencegroundtrack x y delta_height ref_pt 1421511 1 667763.459799 -1.480881e+06 -0.219727 1421514 1 667747.547896 -1.480824e+06 -0.194458 1421517 1 667731.636741 -1.480767e+06 -0.521118 1421520 1 667716.200759 -1.480710e+06 -0.485229 1421523 1 667700.285641 -1.480653e+06 -0.337769
# Save or Load delta height data
# df_dh.to_parquet(f"ATLXI/df_dh_{placename}.parquet")
# df_dh: pd.DataFrame = pd.read_parquet(f"ATLXI/df_dh_{placename}.parquet")
# df_dh = df_dh.sample(n=1_000_000)
Using datashader to make the plotting real fast, it actually rasterizes the vector points into a raster grid, since our eyes can't see millions of points that well anyway. You can choose any region, but we'll focus on the Siple Coast Ice Streams. Using PyGMT, we'll plot the Antarctic grounding line as well as the ATL11 height changes overlaid with Subglacial Lake outlines from Smith et al., 2009.
# Select region here, see also geodataframe of regions at top
placename: str = "antarctica"
region: deepicedrain.Region = deepicedrain.Region.from_gdf(gdf=regions.loc[placename])
# Find subglacial lakes (Smith et al., 2009) within region of interest
subglacial_lakes_gdf = gpd.read_file(
filename=r"Quantarctica3/Glaciology/Subglacial Lakes/SubglacialLakes_Smith.shp"
)
subglacial_lakes_gdf = subglacial_lakes_gdf.loc[
subglacial_lakes_gdf.within(
shapely.geometry.Polygon.from_bounds(*region.bounds(style="lbrt"))
)
]
subglacial_lakes_geom = [g for g in subglacial_lakes_gdf.geometry]
subglacial_lakes = [
np.dstack(g.exterior.coords.xy).squeeze().astype(np.float32)
for g in subglacial_lakes_geom
]
# Datashade our height values (vector points) onto a grid (raster image)
agg_grid: xr.DataArray = region.datashade(df=df_dh, z_dim="delta_height")
print(agg_grid)
<xarray.DataArray (y: 1145, x: 1400)> array([[nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan], ..., [nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan], [nan, nan, nan, ..., nan, nan, nan]]) Coordinates: * x (x) float64 -2.698e+06 -2.694e+06 -2.69e+06 ... 2.794e+06 2.798e+06 * y (y) float64 -2.198e+06 -2.194e+06 -2.19e+06 ... 2.294e+06 2.298e+06
# Plot our map!
scale: int = region.scale
fig = pygmt.Figure()
# fig.grdimage(
# grid="Quantarctica3/SatelliteImagery/MODIS/MODIS_Mosaic.tif",
# region=region,
# projection=f"x{scale}",
# I="+d",
# )
pygmt.makecpt(cmap="roma", series=[-2, 2])
fig.grdimage(
grid=agg_grid,
region=region.bounds(),
projection=f"x1:{scale}",
frame=["afg", f'WSne+t"ICESat-2 Ice Surface Change over {region.name}"'],
Q=True,
)
for subglacial_lake in subglacial_lakes:
fig.plot(data=subglacial_lake, L=True, pen="thinnest")
fig.colorbar(
position="JCR+e", frame=["af", 'x+l"Elevation Change from Cycle 3 to 7"', "y+lm"]
)
fig.coast(
region=region.bounds(),
projection=f"s0/-90/-71/1:{scale}",
area_thresh="+ag",
resolution="i",
shorelines="0.5p",
# land="snow4",
# water="snow3",
V="q",
)
fig.savefig(f"figures/plot_atl11_dh37_{placename}.png")
fig.show(width=600)
Meant to be a bit more interactive but slightly buggy, need to sort out python dependency issues.
shade_grid = datashader.transfer_functions.shade(
agg=agg_grid, cmap=matplotlib.cm.RdYlBu, how="linear", span=[-2, 2]
)
spread_grid = datashader.transfer_functions.dynspread(shade_grid)
spread_grid
df_dh.hvplot.points(
# title="Elevation Change (metres) from Cycle 5 to 6",
x="x",
y="y",
c="delta_height",
# cmap="RdYlBu",
# aggregator=datashader.mean("delta_height"),
rasterize=True,
# responsive=True,
# datashade=True,
# dynamic=True,
# dynspread=True,
hover=True,
height=400,
symmetric=True,
clim=(-20, 20),
)
points = hv.Points(
data=df_dh,
kdims=["x", "y"],
vdims=["delta_height"],
# datatype=["xarray"],
)
hv.operation.datashader.datashade(points)