#!/usr/bin/env python # coding: utf-8 # ## National Water Model Example # # As described in the [overview](https://planetarycomputer.microsoft.com/dataset/storage/noaa-nwm), National Water Model products are available from Azure Blob Storage. This notebook uses [xarray](https://xarray.pydata.org/) and [adlfs](https://fsspec.github.io/adlfs) to load the NetCDF files and visualize the data. # In[1]: import azure.storage.blob import planetary_computer import adlfs import pandas as pd import geopandas import xarray as xr fs = adlfs.AzureBlobFileSystem( "noaanwm", credential=planetary_computer.sas.get_token("noaanwm", "nwm").token ) # A new set of files is produced daily, using the filepattern `nmw.YYYYMMDD` for year, month, and day. # ## Streamflow data # # This example shows how to load `streamflow` data and visualize it on a map. # In[2]: prefix = "nwm/nwm.20230123" ds = xr.open_dataset( fs.open(f"{prefix}/short_range/nwm.t00z.short_range.channel_rt.f001.conus.nc") ) ds # We can visualize the distribution of the streamflow variable: # In[3]: ds["streamflow"].plot.hist(log=True, bins=50); # The `streamflow` is indexed by a `feature_id` coordinate variable. We can load some information for those coordinates from a geoparquet file in Azure Blob Storage, created from the GeoPackage linked from https://water.noaa.gov/about/nwm. # In[4]: gdf = geopandas.read_parquet( "az://hydrofabric/nwm_reaches_conus.parquet", storage_options={ "account_name": "noaanwm" } ).set_index("ID") # The geospatial data from `gdf` can be joined to the values from `ds.streamflow`: # In[5]: streamflow = ds.streamflow.to_pandas() gdf["streamflow"] = streamflow gdf.head() # Now we can visualize the data. Each record in `gdf` is a streamflow # In[6]: gdf[gdf.streamflow > 0].head(1).explore(tiles="Stamen Terrain") # To make a simple plot for the entire Continental United States, we'll take the centroid of each geometry. # In[7]: import warnings with warnings.catch_warnings(action="ignore"): center = gdf.centroid data = pd.DataFrame({ "x": center.x, "y": center.y, "streamflow": gdf.streamflow }) # To avoid overplotting from the 2,000,000+ points, we'll use [datashader](https://datashader.org/) to aggregate the points before plotting. # In[8]: import datashader, colorcet import matplotlib.pyplot as plt viridis = plt.get_cmap("viridis") cvs = datashader.Canvas(plot_width=1100, plot_height=700) agg = cvs.points(data, "x", "y", agg=datashader.mean("streamflow")) img = datashader.transfer_functions.shade(agg, how="log", cmap=viridis) img # ## Land data # # We'll load a short-range `land` forecast for the Continental United States. # In[9]: prefix = "nwm/nwm.20230123" ds = xr.open_dataset( fs.open(f"{prefix}/short_range/nwm.t00z.short_range.land.f001.conus.nc") ) display(ds) soil_saturation = ds["SOILSAT_TOP"].load() # The `xarray.Dataset` shows the variables that are available. We'll load the `SOILSAT_TOP` variable, which gives the fraction of soil saturation for the top two layers. # In[10]: import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(16, 10)) soil_saturation.coarsen(x=4, y=4, boundary="trim").mean().plot(ax=ax) ax.set(title="Soil saturation fraction") plt.savefig("noaa-nwm-soil-saturation.png", bbox_inches=0, pad_inches=0) # Similar files are available for different forecast hours (`f001`, `f002`, ..., `f018`) # # These forecasts also include data on channel routing, terrain routing, and reservoir output. The reservoir data can be converted from the NetCDF data model to a tabular data strcture. # In[11]: reservoir = xr.open_dataset( fs.open("nwm/nwm.20230123/short_range/nwm.t00z.short_range.reservoir.f001.conus.nc") ).load() reservoir # In[12]: import geopandas import pyproj crs = pyproj.CRS.from_cf(reservoir.crs.attrs) df = reservoir.drop("crs").to_dataframe() geometry = geopandas.points_from_xy(df.longitude, df.latitude, crs=crs) gdf = geopandas.GeoDataFrame(df, geometry=geometry) gdf.head() # Which can also be visualized. # In[13]: import contextily fig, ax = plt.subplots(figsize=(16, 12)) gdf[["inflow", "geometry"]].plot( column="inflow", scheme="NaturalBreaks", markersize=5, legend=True, ax=ax, cmap="plasma", ) contextily.add_basemap(ax, crs=str(gdf.crs)) ax.set_axis_off() # Other kinds data are available under each date's prefix. Some sub-folders different kinds of data (forcings, long- and medium-range forecasts, etc.) and some cover different regions (Hawaii and Puerto Rico). # In[14]: fs.ls(prefix)