xarray
¶Xarrays is a new-ish Python library that makes working with n-dimensional data relatively painless.
More info: http://xarray.pydata.org/en/stable/
#Import libraries
import xarray as xr
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
#Load the netCDF dataset into an `xarray` object
fileName = 'macav2livneh_pr_bcc-csm1-1_r1i1p1_historical_1990_2005_CONUS_monthly.nc'
ds = xr.open_dataset(fileName)
#Extract the precipitation data into an xarray dataset object
ds = xr.open_dataset(fileName)
#Save the precipitation values into a single data array
arrPrecip = ds['precipitation']
#Display information about the arrPrecip data array
print(arrPrecip)
#Extract the time dimension from the data array
arrTime = arrPrecip.time
#Display the first 5 records of the arrTime data array
arrTime[:5].data
#Extract the lat and long values
arrLat = arrPrecip.lat
arrLon = arrPrecip.lon
arrLat[30].data,arrLon[30].data
Use the xarray dataset's sel
function to select the datum nearest the specified time and location
theTime = np.datetime64('1990-03-15')
theLat = 36.005
theLon = 360-78.942
#Extract the value corresponding to the point nearest to the specified time & location
theResult = ds.sel(lat=theLat,lon=theLon,time=theTime,method='nearest')
#Show the precipitation at that point
theResult.precipitation.data
Dropping one of the dimensions in the sel
statement retrieves all data in that dimension fitting the criteria specified in the other dimensions. Here, we omit the time constraint.
#Select only on lat and lon and we get all precip data for all times
theTimeSeries = ds.sel(lat=theLat,lon=theLon,method='nearest')
#Extract the precipitation data array from the filtered dataset
ts_Precip = theTimeSeries.precipitation
#Plot the time series
fig = plt.figure(figsize=(15,5))
ts_Precip.plot.line();
#Drop the lat and lon filters to grab data for all locations
theMapResult = ds.sel(time=theTime).precipitation
#Plot the data
fig = plt.figure(figsize=(12,5))
theMapResult.plot(cmap="YlGnBu", #Specifies the colors to use
robust=True); #Drops outliers (<2%,>98%) from plot
We uses "slices" to extract subsets of data. Here we subset the data spatially and compute the mean
#Specify the spatial slices to grab
theLats = slice(25,36.5)
theLons = slice(360-91,360-76)
#Extract the spatial subset into its own data array
theSubset = ds.sel(lon=theLons,lat=theLats).precipitation
#Display the shape of the returned data array
theSubset.shape
#Plot a histogram of the data within the subset
theSubset.plot();
#Compute the mean across the time axis and show a map
theSubsetAvg = theSubset.mean(axis=0)
theSubsetAvg.plot(cmap="YlGnBu");
#Fancier plots
plt.figure(figsize=(15,3))
plt.subplot(1,3,1)
theSubset.min(axis=0).plot(cmap="YlGnBu")
plt.subplot(1,3,2)
theSubset.mean(axis=0).plot(cmap="YlGnBu")
plt.subplot(1,3,3)
theSubset.max(axis=0).plot(cmap="YlGnBu")
plt.show();
The xarray
package supports seasons to make easy seasonal averages.
#Create a new data array by converting the dates in the `time` array to seasons
arrSeason = ds['time'].dt.season
#Replace the time dimension with seasons
ds['time'] = ds['time'].dt.season
#Extract precipitation for just the summer months; we have 48 summers of data
summerPrecip = ds.sel(time='JJA',lat=theLats,lon=theLons).precipitation
summerPrecip.shape
summerPrecip.plot();
#Fancier plots
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
summerPrecip.mean(axis=0).plot(cmap="YlGnBu")
plt.subplot(1,2,2)
summerPrecip.std(axis=0).plot(cmap="YlGnBu")
#plt.subplot(1,3,3)
#summerPrecip.max(axis=0).plot(cmap="YlGnBu")
plt.show();