#!/usr/bin/env python # coding: utf-8 # # SWOT Hydrology Dataset Exploration on a local machine # # ## Accessing and Visualizing SWOT Datasets # # ### Requirement: # Local compute environment e.g. laptop, server: this tutorial can be run on your local machine. # # ### Learning Objectives: # - Access SWOT HR data products (archived in NASA Earthdata Cloud) by downloading to local machine # - Visualize accessed data for a quick check # # #### SWOT Level 2 KaRIn High Rate Version 2.0 Datasets: # # 1. **River Vector Shapefile** - SWOT_L2_HR_RIVERSP_2.0 # # 2. **Lake Vector Shapefile** - SWOT_L2_HR_LAKESP_2.0 # # 3. **Water Mask Pixel Cloud NetCDF** - SWOT_L2_HR_PIXC_2.0 # # 4. **Water Mask Pixel Cloud Vector Attribute NetCDF** - SWOT_L2_HR_PIXCVec_2.0 # # 5. **Raster NetCDF** - SWOT_L2_HR_Raster_2.0 # # _Notebook Author: Cassie Nickles, NASA PO.DAAC (Feb 2024) || Other Contributors: Zoe Walschots (PO.DAAC Summer Intern 2023), Catalina Taglialatela (NASA PO.DAAC), Luis Lopez (NASA NSIDC DAAC), Brent Williams (NASA JPL)_ # # _Last updated: 9 July 2024_ # # ### Libraries Needed # In[39]: import glob import h5netcdf import xarray as xr import pandas as pd import geopandas as gpd import contextily as cx import numpy as np import matplotlib.pyplot as plt import hvplot.xarray import zipfile import earthaccess # ### Earthdata Login # # An Earthdata Login account is required to access data, as well as discover restricted data, from the NASA Earthdata system. Thus, to access NASA data, you need Earthdata Login. If you don't already have one, please visit https://urs.earthdata.nasa.gov to register and manage your Earthdata Login account. This account is free to create and only takes a moment to set up. We use `earthaccess` to authenticate your login credentials below. # In[40]: auth = earthaccess.login() # ### Single File Access # # #### **1. River Vector Shapefiles** # # The https access link can be found using `earthaccess` data search. Since this collection consists of Reach and Node files, we need to extract only the granule for the Reach file. We do this by filtering for the 'Reach' title in the data link. # # Alternatively, Earthdata Search [(see tutorial)](https://nasa-openscapes.github.io/2021-Cloud-Workshop-AGU/tutorials/01_Earthdata_Search.html) can be used to manually search in a GUI interface. # # For additional tips on spatial searching of SWOT HR L2 data, see also [PO.DAAC Cookbook - SWOT Chapter tips section](https://podaac.github.io/tutorials/quarto_text/SWOT.html#tips-for-swot-hr-spatial-search). # # #### Search for the data of interest # In[41]: #Retrieves granule from the day we want, in this case by passing to `earthaccess.search_data` function the data collection shortname, temporal bounds, and filter by wildcards river_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_RIVERSP_2.0', temporal = ('2024-02-01 00:00:00', '2024-07-15 23:59:59'), # can also specify by time granule_name = '*Reach*_287_NA*') # here we filter by Reach files (not node), pass=287, continent code=NA # #### Dowload, unzip, read the data # # Let's download the first data file! `earthaccess.download` has a list as the input format, so we need to put brackets around the single file we pass. # In[42]: earthaccess.download([river_results[0]], "./data_downloads") # The native format for this data is a .zip file, and we want the .shp file within the .zip file, so we must first extract the data to open it. First, we'll programmatically get the filename we just downloaded, and then extract all data to the `data_downloads` folder. # In[43]: filename = earthaccess.results.DataGranule.data_links(river_results[0], access='external') filename = filename[0].split("/")[-1] filename # In[44]: with zipfile.ZipFile(f'data_downloads/{filename}', 'r') as zip_ref: zip_ref.extractall('data_downloads') # Open the shapefile using `geopandas` # In[45]: filename_shp = filename.replace('.zip','.shp') # In[46]: SWOT_HR_shp1 = gpd.read_file(f'data_downloads/{filename_shp}') #view the attribute table SWOT_HR_shp1 # #### Quickly plot the SWOT river data # In[9]: # Simple plot fig, ax = plt.subplots(figsize=(7,5)) SWOT_HR_shp1.plot(ax=ax, color='black') cx.add_basemap(ax, crs=SWOT_HR_shp1.crs, source=cx.providers.OpenTopoMap) # In[ ]: # Another way to plot geopandas dataframes is with `explore`, which also plots a basemap SWOT_HR_shp1.explore() # #### **2. Lake Vector Shapefiles** # The lake vector shapefiles can be accessed in the same way as the river shapefiles above. # # For additional tips on spatial searching of SWOT HR L2 data, see also [PO.DAAC Cookbook - SWOT Chapter tips section](https://podaac.github.io/tutorials/quarto_text/SWOT.html#tips-for-swot-hr-spatial-search). # #### Search for data of interest # In[48]: lake_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_LAKESP_2.0', temporal = ('2024-02-01 00:00:00', '2024-07-15 23:59:59'), # can also specify by time granule_name = '*Prior*_287_NA*') # here we filter by files with 'Prior' in the name (This collection has three options: Obs, Unassigned, and Prior), pass 287 and continent code=NA # Let's download the first data file! earthaccess.download has a list as the input format, so we need to put brackets around the single file we pass. # In[49]: earthaccess.download([lake_results[0]], "./data_downloads") # The native format for this data is a .zip file, and we want the .shp file within the .zip file, so we must first extract the data to open it. First, we'll programmatically get the filename we just downloaded, and then extract all data to the `SWOT_downloads` folder. # In[50]: filename2 = earthaccess.results.DataGranule.data_links(lake_results[0], access='external') filename2 = filename2[0].split("/")[-1] filename2 # In[51]: with zipfile.ZipFile(f'data_downloads/{filename2}', 'r') as zip_ref: zip_ref.extractall('data_downloads') # Open the shapefile using `geopandas` # In[52]: filename_shp2 = filename2.replace('.zip','.shp') filename_shp2 # In[53]: SWOT_HR_shp2 = gpd.read_file(f'data_downloads/{filename_shp2}') #view the attribute table SWOT_HR_shp2 # #### Quickly plot the SWOT lakes data # In[17]: fig, ax = plt.subplots(figsize=(7,5)) SWOT_HR_shp2.plot(ax=ax, color='black') cx.add_basemap(ax, crs=SWOT_HR_shp2.crs, source=cx.providers.OpenTopoMap) # Accessing the remaining files is different than the shp files above. We do not need to extract the shapefiles from a zip file because the following SWOT HR collections are stored in **netCDF** files in the cloud. For the rest of the products, we will open via `xarray`, not `geopandas`. # #### **3. Water Mask Pixel Cloud NetCDF** # #### Search for data collection and time of interest # In[54]: pixc_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_PIXC_2.0', temporal = ('2024-02-01 00:00:00', '2024-07-15 23:59:59'), # can also specify by time bounding_box = (-106.62, 38.809, -106.54, 38.859)) # Lake Travis near Austin, TX # Let's download one data file! earthaccess.download has a list as the input format, so we need to put brackets around the single file we pass. # In[55]: earthaccess.download([pixc_results[0]], "./data_downloads") # #### Open data using xarray # The pixel cloud netCDF files are formatted with three groups titled, "pixel cloud", "tvp", or "noise" (more detail [here](https://podaac-tools.jpl.nasa.gov/drive/files/misc/web/misc/swot_mission_docs/pdd/D-56411_SWOT_Product_Description_L2_HR_PIXC_20200810.pdf)). In order to access the coordinates and variables within the file, a group must be specified when calling xarray open_dataset. # In[56]: ds_PIXC = xr.open_mfdataset("data_downloads/SWOT_L2_HR_PIXC_*.nc", group = 'pixel_cloud', engine='h5netcdf') ds_PIXC # #### For plotting PIXC using classification and quality flags # In[57]: # mask to get good water pixels mask = np.where(np.logical_and(ds_PIXC.classification > 2, ds_PIXC.geolocation_qual <16384)) plt.scatter(x=ds_PIXC.longitude[mask], y=ds_PIXC.latitude[mask], c=ds_PIXC.height[mask]) plt.colorbar().set_label('Height (m)') # #### **4. Water Mask Pixel Cloud Vector Attribute NetCDF** # #### Search for data of interest # In[58]: #Let's plot the same pass and tile as the above pixcvec_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_PIXCVEC_2.0', granule_name = '*010_412_087L*') #The same cycle, pass, and tile as previously downloaded # Let's download the first data file! earthaccess.download has a list as the input format, so we need to put brackets around the single file we pass. # In[59]: earthaccess.download([pixcvec_results[0]], "./data_downloads") # #### Open data using xarray # First, we'll programmatically get the filename we just downloaded and then view the file via `xarray`. # In[60]: ds_PIXCVEC = xr.open_mfdataset("data_downloads/SWOT_L2_HR_PIXCVec_*.nc", decode_cf=False, engine='h5netcdf') ds_PIXCVEC # #### Simple plot # In[61]: pixcvec_htvals = ds_PIXCVEC.height_vectorproc.compute() pixcvec_latvals = ds_PIXCVEC.latitude_vectorproc.compute() pixcvec_lonvals = ds_PIXCVEC.longitude_vectorproc.compute() #Before plotting, we set all fill values to nan so that the graph shows up better spatially pixcvec_htvals[pixcvec_htvals > 15000] = np.nan pixcvec_latvals[pixcvec_latvals == 0] = np.nan pixcvec_lonvals[pixcvec_lonvals == 0] = np.nan # In[62]: plt.scatter(x=pixcvec_lonvals, y=pixcvec_latvals, c=pixcvec_htvals) plt.colorbar().set_label('Height (m)') # #### **5. Raster NetCDF** # #### Search for data of interest # In[63]: raster_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_Raster_2.0', #temporal = ('2024-02-01 00:00:00', '2024-07-15 23:59:59'), # can also specify by time granule_name = '*100m*010_412_*', #The same cycle and pass as previously downloaded', # here we filter by files with '100m' in the name (This collection has two resolution options: 100m & 250m) bounding_box = (-106.62, 38.809, -106.54, 38.859)) # Lake Travis near Austin, TX # Let's download one data file. # In[64]: earthaccess.download([raster_results[0]], "./data_downloads") # #### Open data with xarray # First, we'll programmatically get the filename we just downloaded and then view the file via `xarray`. # In[65]: ds_raster = xr.open_mfdataset(f'data_downloads/SWOT_L2_HR_Raster*', engine='h5netcdf') ds_raster # #### Quick interactive plot with `hvplot` # # Note: this is not filtered by quality # In[ ]: ds_raster.wse.hvplot.image(y='y', x='x')