#!/usr/bin/env python # coding: utf-8 # ## Demo notebook for accessing NSRDB data on Azure # # This notebook provides an example of accessing National Solar Radiation Database (NSRDB) data from blob storage on Azure. The data is stored in one HDF file per year. # # NSRDB data are stored in the East US Azure region, so this notebook will run most efficiently on Azure compute located in East US. We recommend that substantial computation depending on NSRDB data also be situated in East US. You don't want to download hundreds of terabytes to your laptop! If you are using this data for environmental science applications, consider applying for an [AI for Earth grant](http://aka.ms/ai4egrants) to support your compute requirements. # # This notebook was adapted from the [NREL NSRDB/HSDS example](https://github.com/NREL/hsds-examples/blob/master/notebooks/03_NSRDB_introduction.ipynb). # ### Imports and constants # In[1]: import xarray as xr import pandas as pd import planetary_computer from adlfs import AzureBlobFileSystem # Year to investigate and plot year = 2015 # Storage resources storage_account_name = 'nrel' folder = f'nrel-nsrdb/v3' # ### List the data files # # We can use `adlfs` to list available files (one per year): # In[2]: fs = AzureBlobFileSystem( account_name=storage_account_name, credential=planetary_computer.sas.get_token("nrel", "nrel-nsrdb").token ) annual_files = fs.glob(folder + '/*.h5') print('Found {} annual files:'.format(len(annual_files))) for path in annual_files[:10]: print(path) print('...') # ### Open one data file with xarray # In[3]: file = fs.open(f"nrel-nsrdb/v3/nsrdb_{year}.h5") ds = xr.open_dataset(file, backend_kwargs={"phony_dims": "sort"}, engine="h5netcdf") ds # ### Explore metadata # In[4]: # Datasets are stored in 2D grids of size [time x location] dset = ds['ghi'] dset.shape # In[5]: # Extract datetime index for datasets time_index = pd.to_datetime(ds['time_index'][...].astype(str)) time_index # temporal resolution is 30min # In[6]: # Location information is stored in either 'meta' or 'coordinates' meta = ds['meta'] # Each element contains values for, e.g., latitude, longitude, elevation, and land cover type print(meta) # ### Extract a subset of the data # In[7]: # find the position where time_index is this value timestep = time_index.get_loc(pd.Timestamp(str(year) + "-12-31 19:00:00")) # capture metadata in a pandas dataframe meta = ds["meta"].data df = pd.DataFrame(meta) # find the positions where df['state'] is California CA = df[df["state"] == b"California"] # load a subset of that data subset = ds["ghi"][timestep, CA.index].load() # ### Plot that subset # In[8]: p = pd.DataFrame({"ghi": subset, "lat": CA.latitude, "lon": CA.longitude}).plot.scatter(x="lon", y="lat", c="ghi", cmap="viridis")