# We need to run this just one then restart the kernel (and clean the output) after installing the library
!pip install git+https://github.com/nsidc/earthdata.git@main
from earthdata import Auth, Store, DataGranules, DataCollections
from pprint import pprint
auth = Auth().login(strategy="netrc")
# are we authenticated?
if not auth.authenticated:
# ask for credentials and persist them in a .netrc file
auth.login(strategy="interactive", persist=True)
# The Store class will let us download data from NASA directly
store = Store(auth)
You're now authenticated with NASA Earthdata Login Using token with expiration date: 09/26/2022
# We'll get 4 collections that match with our keywords
collections = DataCollections(auth).keyword("SEA SURFACE HEIGHT").cloud_hosted(True).get(4)
# Let's print 2 collections
for collection in collections[0:2]:
# pprint(collection.summary())
print(pprint(collection.summary()) , collection.abstract(), "\n")
{'cloud-info': {'Region': 'us-west-2', 'S3BucketAndObjectPrefixNames': ['podaac-ops-cumulus-protected/ALTIKA_SARAL_L2_OST_XOGDR/', 'podaac-ops-cumulus-public/ALTIKA_SARAL_L2_OST_XOGDR/'], 'S3CredentialsAPIDocumentationURL': 'https://archive.podaac.earthdata.nasa.gov/s3credentialsREADME', 'S3CredentialsAPIEndpoint': 'https://archive.podaac.earthdata.nasa.gov/s3credentials'}, 'concept-id': 'C2251465126-POCLOUD', 'file-type': "[{'Format': 'netCDF-4', 'FormatType': 'Native'}]", 'get-data': ['https://search.earthdata.nasa.gov/search/granules?p=C2251465126-POCLOUD', 'https://cmr.earthdata.nasa.gov/virtual-directory/collections/C2251465126-POCLOUD'], 'short-name': 'ALTIKA_SARAL_L2_OST_XOGDR', 'version': 'f'} None These data are near-real-time (NRT) (within 7-9 hours of measurement) sea surface height anomalies (SSHA) from the AltiKa altimeter onboard the Satellite with ARgos and ALtiKa (SARAL). SARAL is a French(CNES)/Indian(SARAL) collaborative mission to measure sea surface height using the Ka-band AltiKa altimeter and was launched February 25, 2013. The major difference between these data and the Operational Geophysical Data Record (OGDR) data produced by the project is that the orbit from SARAL has been adjusted using SSHA differences with those from the OSTM/Jason-2 GPS-OGDR-SSHA product at inter-satellite crossover locations. This produces a more accurate NRT orbit altitude for SARAL with accuracy of 1.5 cm (RMS), taking advantage of the 1 cm (radial RMS) accuracy of the GPS-based orbit used for the OSTM/Jason-2 GPS-OGDR-SSHA product. This dataset also contains all data from the project (reduced) OGDR, and improved altimeter wind speeds and sea state bias correction. More information on the SARAL mission can be found at: http://www.aviso.oceanobs.com/en/missions/current-missions/saral.html {'cloud-info': {'Region': 'us-west-2', 'S3BucketAndObjectPrefixNames': ['podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812/', 'podaac-ops-cumulus-public/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812/'], 'S3CredentialsAPIDocumentationURL': 'https://archive.podaac.earthdata.nasa.gov/s3credentialsREADME', 'S3CredentialsAPIEndpoint': 'https://archive.podaac.earthdata.nasa.gov/s3credentials'}, 'concept-id': 'C2036880672-POCLOUD', 'file-type': "[{'Format': 'netCDF-4', 'FormatType': 'Native'}]", 'get-data': ['https://cmr.earthdata.nasa.gov/virtual-directory/collections/C2036880672-POCLOUD', 'https://search.earthdata.nasa.gov/search/granules?p=C2036880672-POCLOUD'], 'short-name': 'SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812', 'version': '1812'} None These are gridded Sea Surface Height Anomalies (SSHA) above a mean sea surface, on 1/6th degree grid every 5 days. It contains the fully corrected heights, but delayed 3 months. The gridded data are derived from the SSHA data of TOPEX/Poseidon, Jason-1, Jason-2 and Jason-3 as reference data from the level 2 swath data found at https://podaac.jpl.nasa.gov/dataset/MERGED_TP_J1_OSTM_OST_CYCLES_V42, plus ERS-1, ERS-2, Envisat, SARAL-AltiKa, CRyosat-2, depending on the date, from the RADS database. The gridding is done by the kriging method. The date given in the data is the center of the 5 day window.
Things to keep in mind:
granules = DataGranules().concept_id("C2036880672-POCLOUD").temporal("2017-01","2018-01").get()
print(len(granules))
72
If we chose, we can use earthdata
to grab the file's URLs and then download them with another library (if we have a .netrc
file configured with NASA's EDL credentials)
Getting the links to our data is quiet simple with the data_links()
method on each of the results:
# on_prem is a bit misleading, the collection is cloud hosted, in this case the access can be done out of region
granules[0].data_links(access="onprem")
['https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812/ssh_grids_v1812_2017010412.nc']
granules[0].data_links(access="direct")
['s3://podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812/ssh_grids_v1812_2017010412.nc']
We can use earthdata
to stream files directly into xarray, this will work well for cases where:
h5netcdf
and scipy
back-ends, if we deal with a format that won't be recognized by these 2 backends xarray will raise an exception.Opening a year of SSH (SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812) data (1.1 GB approx) takes about 5 minutes using streaming when we access out of region(not in AWS)
%%time
import xarray as xr
granules = []
# we just grab 1 granule from May for each year of the dataset
for year in range(1990, 2019):
granule = DataGranules().concept_id("C2036880672-POCLOUD").temporal(f"{year}-05", f"{year}-06").get(1)
if len(granule)>0:
granules.append(granule[0])
ds = xr.open_mfdataset(store.open(granules), chunks={})
ds
Opening 26 granules, approx size: 0.4 GB https://archive.podaac.earthdata.nasa.gov/s3credentials https://archive.podaac.earthdata.nasa.gov/s3credentials
SUBMITTING | : 0%| | 0/26 [00:00<?, ?it/s]
PROCESSING | : 0%| | 0/26 [00:00<?, ?it/s]
COLLECTING | : 0%| | 0/26 [00:00<?, ?it/s]
CPU times: user 4.16 s, sys: 1.06 s, total: 5.22 s Wall time: 30.5 s
<xarray.Dataset> Dimensions: (Time: 26, Longitude: 2160, nv: 2, Latitude: 960) Coordinates: * Longitude (Longitude) float32 0.08333 0.25 0.4167 ... 359.6 359.8 359.9 * Latitude (Latitude) float32 -79.92 -79.75 -79.58 ... 79.58 79.75 79.92 * Time (Time) datetime64[ns] 1993-05-05T12:00:00 ... 2018-05-04T12:... Dimensions without coordinates: nv Data variables: Lon_bounds (Time, Longitude, nv) float32 dask.array<chunksize=(1, 2160, 2), meta=np.ndarray> Lat_bounds (Time, Latitude, nv) float32 dask.array<chunksize=(1, 960, 2), meta=np.ndarray> Time_bounds (Time, nv) datetime64[ns] dask.array<chunksize=(1, 2), meta=np.ndarray> SLA (Time, Longitude, Latitude) float32 dask.array<chunksize=(1, 2160, 960), meta=np.ndarray> SLA_ERR (Time, Longitude, Latitude) float32 dask.array<chunksize=(1, 2160, 960), meta=np.ndarray> Attributes: (12/13) Conventions: CF-1.6 ncei_template_version: NCEI_NetCDF_Grid_Template_v2.0 Institution: Jet Propulsion Laboratory geospatial_lat_min: -79.916664 geospatial_lat_max: 79.916664 geospatial_lon_min: 0.083333336 ... ... time_coverage_start: 1993-05-05 time_coverage_end: 1993-05-05 date_created: 2019-02-11T20:20:41.809201 version_number: 1812 summary: Sea level anomaly grids from altimeter data using... title: Sea Level Anormaly Estimate based on Altimeter Data
ds.SLA.where((ds.SLA>=0) & (ds.SLA < 10)).mean('Time').plot(figsize=(14,6), x='Longitude', y='Latitude')
<matplotlib.collections.QuadMesh at 0x7f9c1bda9250>
from pyproj import Geod
import numpy as np
def ssl_area(lats):
"""
Calculate the area associated with a 1/6 by 1/6 degree box at latitude specified in 'lats'.
Parameter
==========
lats: a list or numpy array of size N the latitudes of interest.
Return
=======
out: Array (N) area values (unit: m^2)
"""
# Define WGS84 as CRS:
geod = Geod(ellps='WGS84')
dx=1/12.0
# TODO: explain this
c_area=lambda lat: geod.polygon_area_perimeter(np.r_[-dx,dx,dx,-dx], lat+np.r_[-dx,-dx,dx,dx])[0]
out=[]
for lat in lats:
out.append(c_area(lat))
return np.array(out)
ssh_area = ssl_area(ds.Latitude.data).reshape(1,-1)
print(ssh_area.shape)
(1, 960)
%%time
import numpy as np
def global_mean(SLA, **kwargs):
dout=((SLA*ssh_area).sum()/(SLA/SLA*ssh_area).sum())
return dout
result = ds.SLA.groupby('Time').apply(global_mean)
result.plot(color="red", marker="o",figsize=(8,4))
CPU times: user 1.68 s, sys: 104 ms, total: 1.79 s Wall time: 1.16 s
[<matplotlib.lines.Line2D at 0x7f9c126d0ee0>]