A simple tool for checking CMR queries
Jupyter widgets are used to set parameters for the SlideRule API.
Regions of interest for submitting to SlideRule are drawn on a ipyleaflet map.
import re
import time
import logging
import posixpath
import numpy as np
import geopandas as gpd
import concurrent.futures
import matplotlib.cm as cm
import matplotlib.colors as colors
import ipywidgets as widgets
from shapely.geometry import LineString
from sliderule import sliderule, icesat2, ipysliderule, earthdata, h5
import sliderule.io
# autoreload
%load_ext autoreload
%autoreload 2
# create logger
logging.basicConfig(level=logging.INFO)
# Configure ICESat-2 API
icesat2.init("slideruleearth.io", loglevel=logging.WARNING)
sliderule.get_version()
# display widgets for setting ICESat-2 parameters
# and the interactive map projection
SRwidgets = ipysliderule.widgets()
widgets.VBox([
SRwidgets.product,
SRwidgets.release,
SRwidgets.start_date,
SRwidgets.end_date,
SRwidgets.projection
])
# create ipyleaflet map in specified projection
m = ipysliderule.leaflet(SRwidgets.projection.value)
m.map
%%time
# for each region of interest
granule_list = []
granule_polygons = []
for poly in m.regions:
# polygon from map
resources,metadata = earthdata.cmr(polygon=poly,
short_name=SRwidgets.product.value,
time_start=SRwidgets.time_start,
time_end=SRwidgets.time_end,
version=SRwidgets.release.value,
return_metadata=True)
# for each granule resource
for i,resource in enumerate(resources):
granule_list.append(resource)
granule_polygons.append(metadata[i].geometry)
# print list of granules
num_granules = len(granule_list)
logging.info('Number of Granules: {0:d}'.format(num_granules))
logging.debug(granule_list)
granule_select = widgets.SelectMultiple(
options=granule_list,
description='Granules:',
layout=widgets.Layout(width='35%', height='200px'),
disabled=False
)
display(granule_select)
granule_indices = list(granule_select.index)
cmap = iter(cm.viridis(np.linspace(0,1,len(granule_indices))))
for g in granule_indices:
color = colors.to_hex(next(cmap))
geojson = ipysliderule.ipyleaflet.GeoJSON(
data=granule_polygons[g].__geo_interface__,
style=dict(
color=color,
fill_color=color,
opacity=0.8,
weight=1,
)
)
m.map.add_layer(geojson)
def s3_retrieve(granule, **kwargs):
# set default keyword arguments
kwargs.setdefault('asset','icesat2')
kwargs.setdefault('index_key','time')
kwargs.setdefault('polygon',None)
# regular expression operator for extracting information from files
rx = re.compile(r'(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})(\d{2})'
r'(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$')
# extract parameters from ICESat-2 granule
PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYCL,GRN,RL,VRS,AUX=rx.findall(granule).pop()
# variables of interest
if (PRD == 'ATL03'):
segment_group = "geolocation"
segment_key = 'segment_id'
lon_key = 'reference_photon_lon'
lat_key = 'reference_photon_lat'
vnames = ['segment_id','delta_time','reference_photon_lat',
'reference_photon_lon']
elif (PRD == 'ATL06'):
segment_group = "land_ice_segments"
segment_key = 'segment_id'
lon_key = 'longitude'
lat_key = 'latitude'
vnames = ['segment_id','delta_time','latitude','longitude']
elif (PRD == 'ATL08'):
segment_group = "land_segments"
segment_key = 'segment_id_beg'
lon_key = 'longitude'
lat_key = 'latitude'
vnames = ['segment_id_beg','segment_id_end','delta_time',
'latitude','longitude']
# for each valid beam within the HDF5 file
frames = []
gt = dict(gt1l=10,gt1r=20,gt2l=30,gt2r=40,gt3l=50,gt3r=60)
atlas_sdp_epoch = np.datetime64('2018-01-01T00:00:00')
kwds = dict(startrow=0,numrows=-1)
for gtx in ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']:
geodatasets = [dict(dataset=f'{gtx}/{segment_group}/{v}',**kwds) for v in vnames]
try:
# get datasets from s3
hidatasets = h5.h5p(geodatasets, granule, kwargs['asset'])
# copy to new "flattened" dictionary
data = {posixpath.basename(key):var for key,var in hidatasets.items()}
# Generate Time Column
delta_time = (data['delta_time']*1e9).astype('timedelta64[ns]')
data['time'] = gpd.pd.to_datetime(atlas_sdp_epoch + delta_time)
except:
pass
else:
# copy filename parameters
data['rgt'] = [int(TRK)]*len(data['delta_time'])
data['cycle'] = [int(CYCL)]*len(data['delta_time'])
data['gt'] = [gt[gtx]]*len(data['delta_time'])
# pandas dataframe from compiled dictionary
frames.append(gpd.pd.DataFrame.from_dict(data))
# concatenate pandas dataframe
try:
df = gpd.pd.concat(frames)
except:
return sliderule.emptyframe()
# convert to a GeoDataFrame
geometry = gpd.points_from_xy(df[lon_key], df[lat_key])
gdf = gpd.GeoDataFrame(df.drop(columns=[lon_key,lat_key]),
geometry=geometry,crs='EPSG:4326')
# create global track variable
track = 6*(gdf['rgt']-1) + (gdf['gt']/10)
gdf = gdf.assign(track=track)
# calculate global reference point
global_ref_pt = 6*1387*gdf[segment_key] + track
gdf = gdf.assign(global_ref_pt=global_ref_pt)
# sort values for reproducible output despite async processing
gdf.set_index(kwargs['index_key'], inplace=True)
gdf.sort_index(inplace=True)
# remove duplicate points
gdf = gdf[~gdf.index.duplicated()]
# intersect with geometry in projected reference system
if kwargs['polygon'] is not None:
gdf = gpd.overlay(gdf.to_crs(kwargs['polygon'].crs),
kwargs['polygon'], how='intersection')
# convert back to original coordinate reference system
return gdf.to_crs('EPSG:4326')
%%time
results = []
# granule resources for selected segments
perf_start = time.perf_counter()
gdf = sliderule.emptyframe()
num_servers, _ = sliderule.update_available_servers()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_servers) as executor:
futures = [executor.submit(s3_retrieve, granule_list[g]) for g in granule_indices]
# Wait for Results
for future in concurrent.futures.as_completed(futures):
# append to dataframe
results.append(future.result())
gdf = gpd.pd.concat(results)
# Display Statistics
print("Reference Ground Tracks: {}".format(gdf["rgt"].unique()))
print("Cycles: {}".format(gdf["cycle"].unique()))
print("Received {} segments".format(gdf.shape[0]))
# fix int columns that were converted in objects
fixed = gdf.drop(columns=['geometry'])
for column in fixed.select_dtypes(include='object').columns:
fixed[column] = fixed[column].astype("int32")
fixed = gpd.GeoDataFrame(fixed,geometry=gdf.geometry,crs='EPSG:4326')
# convert from points to linestrings grouping by track
grouped = fixed.groupby(['track'])['geometry'].apply(
lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist())
geodata = ipysliderule.ipyleaflet.GeoData(geo_dataframe=gpd.GeoDataFrame(grouped),
style={'color': 'black', 'opacity':1, 'weight':0.1})
m.map.add_layer(geodata)