In [1]:

%matplotlib inline
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import intake

In [2]:

catalog = intake.open_esm_datastore('https://cmip6-pds.s3.amazonaws.com/pangeo-cmip6.json')

In [26]:

# Global attributes are described in https://docs.google.com/document/d/1h0r8RZr_f3-8egBMMh7aqLwy3snpD6_MrDz1q8n5XUk/edit

# Variables are described in this spreadsheet
# http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreq/tags/latest/dreqPy/docs/CMIP6_MIP_tables.xlsx
variable_id = 'tas' # Surface Air Temperature
table_id = 'Amon' # Monthly data from Atmosphere

grid = 'gn' #

# Records for Institution, experiment, and source_id are stored in https://github.com/WCRP-CMIP/CMIP6_CVs
experiment_ids = ['historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
activity_ids = ['ScenarioMIP', 'CMIP'] # Search Scenarios & Historical data only
institution_id = 'NASA-GISS'
source_id = ['GISS-E2-1-G']

# Member ID is formed from subexperiment id & variant label
# variant label is
# variant_label: a label constructed from 4 indices stored as global attributes:  
# variant_label = r<k>i<l>p<m>f<n>
#       where
# k = realization_index
# l = initialization_index
# m = physics_index
# n = forcing_index
# For a given experiment, the realization_index, initialization_index, physics_index, and forcing_index are used to uniquely identify each simulation of an ensemble of runs contributed by a single model.

member_id = 'r1i1p3f1'

In [27]:

res = catalog.search(activity_id=activity_ids, institution_id=institution_id, experiment_id=experiment_ids, source_id=source_id, table_id=table_id, variable_id=variable_id, grid_label=grid, member_id=member_id)
display(res.df)

/home/ec2-user/anaconda3/envs/daskpy3/lib/python3.7/site-packages/intake_esm/search.py:107: UserWarning: Query returned zero results.
  warn(message)

	activity_id	institution_id	source_id	experiment_id	member_id	table_id	variable_id	grid_label	zstore	dcpp_init_year	version

In [28]:

datasets = res.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': True})

/home/ec2-user/anaconda3/envs/daskpy3/lib/python3.7/site-packages/intake_esm/core.py:890: UserWarning: There are no datasets to load! Returning an empty dictionary.
  warn('There are no datasets to load! Returning an empty dictionary.')

In [29]:

display(datasets)

{}

In [7]:

# Put the air temperature into units that we understand better

for i in datasets:
    ds = datasets[i]
    if ds.tas.units == 'K':
        ds.tas.values = (ds.tas.values - 273.15) * 9.0 / 5.0 + 32.0
        ds['tas', 'units'] = 'F'

In [8]:

display(datasets['ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp126.Amon.gn'])

<xarray.Dataset>
Dimensions:           (bnds: 2, lat: 90, lon: 144, member_id: 1, time: 1032)
Coordinates:
    height            float64 ...
  * lat               (lat) float64 -89.0 -87.0 -85.0 -83.0 ... 85.0 87.0 89.0
    lat_bnds          (lat, bnds) float64 dask.array<chunksize=(90, 2), meta=np.ndarray>
  * lon               (lon) float64 1.25 3.75 6.25 8.75 ... 353.8 356.2 358.8
    lon_bnds          (lon, bnds) float64 dask.array<chunksize=(144, 2), meta=np.ndarray>
  * time              (time) object 2015-01-16 12:00:00 ... 2100-12-16 12:00:00
    time_bnds         (time, bnds) object dask.array<chunksize=(1032, 2), meta=np.ndarray>
  * member_id         (member_id) <U8 'r1i1p3f1'
Dimensions without coordinates: bnds
Data variables:
    tas               (member_id, time, lat, lon) float32 -18.59 ... -9.801
    ('tas', 'units')  <U1 'F'
Attributes: (12/53)
    Conventions:             CF-1.7 CMIP-6.2
    activity_id:             ScenarioMIP
    branch_method:           standard
    branch_time_in_child:    0.0
    branch_time_in_parent:   0.0
    cmor_version:            3.3.2
    ...                      ...
    variable_id:             tas
    variant_label:           r1i1p3f1
    netcdf_tracking_ids:     hdl:21.14100/72e1e6f8-c5d1-4285-921f-ab81401252d...
    version_id:              v20200115
    intake_esm_varname:      ['tas']
    intake_esm_dataset_key:  ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp126.Amon.gn

xarray.Dataset

Dimensions:
- bnds: 2
- lat: 90
- lon: 144
- member_id: 1
- time: 1032

Coordinates: (8)

height
()
float64
...
axis :
Z
long_name :
height
positive :
up
standard_name :
height
units :
m
```
array(2.)
```

lat

(lat)

float64

-89.0 -87.0 -85.0 ... 87.0 89.0

axis :: Y
bounds :: lat_bnds
long_name :: latitude
standard_name :: latitude
units :: degrees_north

array([-89., -87., -85., -83., -81., -79., -77., -75., -73., -71., -69., -67.,
       -65., -63., -61., -59., -57., -55., -53., -51., -49., -47., -45., -43.,
       -41., -39., -37., -35., -33., -31., -29., -27., -25., -23., -21., -19.,
       -17., -15., -13., -11.,  -9.,  -7.,  -5.,  -3.,  -1.,   1.,   3.,   5.,
         7.,   9.,  11.,  13.,  15.,  17.,  19.,  21.,  23.,  25.,  27.,  29.,
        31.,  33.,  35.,  37.,  39.,  41.,  43.,  45.,  47.,  49.,  51.,  53.,
        55.,  57.,  59.,  61.,  63.,  65.,  67.,  69.,  71.,  73.,  75.,  77.,
        79.,  81.,  83.,  85.,  87.,  89.])

lat_bnds

(lat, bnds)

float64

dask.array<chunksize=(90, 2), meta=np.ndarray>

	Array	Chunk
Bytes	1.44 kB	1.44 kB
Shape	(90, 2)	(90, 2)
Count	2 Tasks	1 Chunks
Type	float64	numpy.ndarray

lon

(lon)

float64

1.25 3.75 6.25 ... 356.2 358.8

axis :: X
bounds :: lon_bnds
long_name :: longitude
standard_name :: longitude
units :: degrees_east

array([  1.25,   3.75,   6.25,   8.75,  11.25,  13.75,  16.25,  18.75,  21.25,
        23.75,  26.25,  28.75,  31.25,  33.75,  36.25,  38.75,  41.25,  43.75,
        46.25,  48.75,  51.25,  53.75,  56.25,  58.75,  61.25,  63.75,  66.25,
        68.75,  71.25,  73.75,  76.25,  78.75,  81.25,  83.75,  86.25,  88.75,
        91.25,  93.75,  96.25,  98.75, 101.25, 103.75, 106.25, 108.75, 111.25,
       113.75, 116.25, 118.75, 121.25, 123.75, 126.25, 128.75, 131.25, 133.75,
       136.25, 138.75, 141.25, 143.75, 146.25, 148.75, 151.25, 153.75, 156.25,
       158.75, 161.25, 163.75, 166.25, 168.75, 171.25, 173.75, 176.25, 178.75,
       181.25, 183.75, 186.25, 188.75, 191.25, 193.75, 196.25, 198.75, 201.25,
       203.75, 206.25, 208.75, 211.25, 213.75, 216.25, 218.75, 221.25, 223.75,
       226.25, 228.75, 231.25, 233.75, 236.25, 238.75, 241.25, 243.75, 246.25,
       248.75, 251.25, 253.75, 256.25, 258.75, 261.25, 263.75, 266.25, 268.75,
       271.25, 273.75, 276.25, 278.75, 281.25, 283.75, 286.25, 288.75, 291.25,
       293.75, 296.25, 298.75, 301.25, 303.75, 306.25, 308.75, 311.25, 313.75,
       316.25, 318.75, 321.25, 323.75, 326.25, 328.75, 331.25, 333.75, 336.25,
       338.75, 341.25, 343.75, 346.25, 348.75, 351.25, 353.75, 356.25, 358.75])

lon_bnds

(lon, bnds)

float64

dask.array<chunksize=(144, 2), meta=np.ndarray>

	Array	Chunk
Bytes	2.30 kB	2.30 kB
Shape	(144, 2)	(144, 2)
Count	2 Tasks	1 Chunks
Type	float64	numpy.ndarray

time

(time)

object

2015-01-16 12:00:00 ... 2100-12-...

axis :: T
bounds :: time_bnds
long_name :: time
standard_name :: time

array([cftime.DatetimeNoLeap(2015, 1, 16, 12, 0, 0, 0),
       cftime.DatetimeNoLeap(2015, 2, 15, 0, 0, 0, 0),
       cftime.DatetimeNoLeap(2015, 3, 16, 12, 0, 0, 0), ...,
       cftime.DatetimeNoLeap(2100, 10, 16, 12, 0, 0, 0),
       cftime.DatetimeNoLeap(2100, 11, 16, 0, 0, 0, 0),
       cftime.DatetimeNoLeap(2100, 12, 16, 12, 0, 0, 0)], dtype=object)

time_bnds

(time, bnds)

object

dask.array<chunksize=(1032, 2), meta=np.ndarray>

	Array	Chunk
Bytes	16.51 kB	16.51 kB
Shape	(1032, 2)	(1032, 2)
Count	2 Tasks	1 Chunks
Type	object	numpy.ndarray

member_id
(member_id)
<U8
'r1i1p3f1'
```
array(['r1i1p3f1'], dtype='<U8')
```

Data variables: (2)

tas

(member_id, time, lat, lon)

float32

-18.59 -18.59 ... -9.801 -9.801

cell_measures :: area: areacella
cell_methods :: area: time: mean
comment :: near-surface (usually, 2 meter) air temperature
history :: 2020-01-30T01:18:14Z altered by CMOR: Treated scalar dimension: 'height'. 2020-01-30T01:18:14Z altered by CMOR: replaced missing value flag (-1e+30) with standard missing value (1e+20).
long_name :: Near-Surface Air Temperature
standard_name :: air_temperature
units :: K

array([[[[-18.593388 , -18.593388 , -18.593388 , ..., -18.593388 ,
          -18.593388 , -18.593388 ],
         [-15.852711 , -16.0765   , -16.645649 , ..., -14.559483 ,
          -15.073917 , -16.03621  ],
         [-17.791164 , -18.283875 , -18.715885 , ..., -14.118355 ,
          -15.499252 , -16.531803 ],
         ...,
         [-23.355408 , -22.740643 , -22.221428 , ..., -25.199604 ,
          -24.953209 , -24.325256 ],
         [-23.514488 , -23.018105 , -22.834084 , ..., -24.49115  ,
          -24.212757 , -23.879677 ],
         [-21.324562 , -21.324562 , -21.324562 , ..., -21.324562 ,
          -21.324562 , -21.324562 ]],

        [[-33.810028 , -33.810028 , -33.810028 , ..., -33.810028 ,
          -33.810028 , -33.810028 ],
         [-32.721504 , -32.59774  , -33.492996 , ..., -31.081795 ,
          -31.74339  , -32.600266 ],
         [-32.670822 , -33.537895 , -34.52733  , ..., -28.448425 ,
          -29.767689 , -31.723286 ],
...
         [ -4.927246 ,  -5.065151 ,  -5.4043007, ...,  -5.081028 ,
           -4.8736343,  -5.050457 ],
         [ -6.1211014,  -6.346981 ,  -6.8316727, ...,  -5.3546143,
           -5.6111717,  -5.8427086],
         [ -7.8525467,  -7.8525467,  -7.8525467, ...,  -7.8525467,
           -7.8525467,  -7.8525467]],

        [[-10.089695 , -10.089695 , -10.089695 , ..., -10.089695 ,
          -10.089695 , -10.089695 ],
         [ -8.558861 ,  -8.325592 ,  -9.282913 , ...,  -7.56715  ,
           -8.2225685,  -8.961647 ],
         [ -8.26635  ,  -8.714012 ,  -8.902184 , ...,  -6.6202126,
           -7.451851 ,  -7.9408493],
         ...,
         [ -4.175205 ,  -3.836441 ,  -3.5659866, ...,  -5.956444 ,
           -4.997505 ,  -4.623474 ],
         [ -7.929344 ,  -7.654438 ,  -7.438446 , ...,  -9.076973 ,
           -8.771309 ,  -8.4853325],
         [ -9.800945 ,  -9.800945 ,  -9.800945 , ...,  -9.800945 ,
           -9.800945 ,  -9.800945 ]]]], dtype=float32)

('tas', 'units')
()
<U1
'F'
```
array('F', dtype='<U1')
```

Attributes: (53)
Conventions :
CF-1.7 CMIP-6.2
activity_id :
ScenarioMIP
branch_method :
standard
branch_time_in_child :
0.0
branch_time_in_parent :
0.0
cmor_version :
3.3.2
contact :
Kenneth Lo (cdkkl@giss.nasa.gov)
creation_date :
2020-01-30T01:18:14Z
data_specs_version :
01.00.23
experiment :
update of RCP2.6 based on SSP1
experiment_id :
ssp126
external_variables :
areacella
forcing_index :
1
frequency :
mon
further_info_url :
https://furtherinfo.es-doc.org/CMIP6.NASA-GISS.GISS-E2-1-G.ssp126.none.r1i1p3f1
grid :
atmospheric grid: 144x90, ocean grid: 288x180
grid_label :
gn
history :
2020-01-30T01:18:14Z ; CMOR rewrote data to be consistent with CMIP6, CF-1.7 CMIP-6.2 and CF standards.
initialization_index :
1
institution :
Goddard Institute for Space Studies, New York, NY 10025, USA
institution_id :
NASA-GISS
license :
CMIP6 model data produced by NASA Goddard Institute for Space Studies is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.
mip_era :
CMIP6
model_id :
E212TomaSSP126aF40oQ40
nominal_resolution :
250 km
parent_activity_id :
CMIP
parent_experiment_id :
historical
parent_experiment_rip :
r1i1p3
parent_mip_era :
CMIP6
parent_source_id :
GISS-E2-1-G
parent_time_units :
days since 2015-1-1
parent_variant_label :
r1i1p3f1
physics_index :
3
product :
model-output
realization_index :
1
realm :
atmos
references :
https://data.giss.nasa.gov/modelE/cmip6
source :
GISS-E2.1G (2019): aerosol: Varies with physics-version (p==1 none, p==3 OMA, p==4 TOMAS, p==5 MATRIX) atmos: GISS-E2.1 (2.5x2 degree; 144 x 90 longitude/latitude; 40 levels; top level 0.1 hPa) atmosChem: Varies with physics-version (p==1 Non-interactive, p>1 GPUCCINI) land: GISS LSM landIce: none ocean: GISS Ocean (GO1, 1 degree; 360 x 180 longitude/latitude; 40 levels; top grid cell 0-10 m) ocnBgchem: none seaIce: GISS SI
source_id :
GISS-E2-1-G
source_type :
AOGCM
status :
2020-03-22;created; by gcs.cmip6.ldeo@gmail.com
sub_experiment :
none
sub_experiment_id :
none
table_id :
Amon
table_info :
Creation Date:(21 March 2018) MD5:c93735846d66458966fc81f390b2d714
title :
GISS-E2-1-G output prepared for CMIP6
tracking_id :
hdl:21.14100/72e1e6f8-c5d1-4285-921f-ab81401252d1 hdl:21.14100/6fc8eae2-b435-4d05-b75a-a9974d2845a4
variable_id :
tas
variant_label :
r1i1p3f1
netcdf_tracking_ids :
hdl:21.14100/72e1e6f8-c5d1-4285-921f-ab81401252d1 hdl:21.14100/6fc8eae2-b435-4d05-b75a-a9974d2845a4
version_id :
v20200115
intake_esm_varname :
['tas']
intake_esm_dataset_key :
ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp126.Amon.gn

In [9]:

# Lets look at how the air temperature changed over the period of this simulation. We will extract the air temperature at the end from the air temperature at the beginning
# We take an average over the last twelve months minus the first twelve months to even out seasonality

# Lets look at SSP585, one of the more extreme emissions scenarios where little is done to curb greenhouse gas emissions

tas = datasets['ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp585.Amon.gn']['tas'].isel(time=np.arange(1032-12,1032)).mean(dim='time') - datasets['ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp585.Amon.gn']['tas'].isel(time=np.arange(0,12)).mean(dim='time')
tas.compute()

In [10]:

tas.plot()

Out[10]:

<matplotlib.collections.QuadMesh at 0x7fbcd08cf690>

In [11]:

# Compute the mean of the differences
mean_difference = tas.mean().compute()

In [12]:

mean_difference

Out[12]:

<xarray.DataArray 'tas' ()>
array(8.126888, dtype=float32)
Coordinates:
    height   float64 2.0

In [13]:

# That shows that on average Earth warms 8 F over the next 100 years in the SSP585 scenario according to the NASA GISS model!

In [22]:

# We can also look at how the temperature evolves over a single location for all of the scenarios too.
# Here we will look at Colorado Springs

location_name = 'Colorado Springs'
location_lon = -104.82
location_lat = 38.83

# convert to 0-360 for longitudes
if location_lon < 0:
     location_lon = 360 + location_lon

In [23]:

ds_scenarios = xr.Dataset()


# interate through the locations and create a dataset
# containing the temperature values for each location
for i in datasets:
    ds = datasets[i]

    ds2 = ds.sel(lon=location_lon, lat=location_lat, method='nearest')
    ds2 = ds2.rename({'tas' : i}).drop(('lat', 'lon', 'height', 'lat_bnds', 'lon_bnds', 'time_bnds', 'member_id'))
    ds_scenarios = xr.merge([ds_scenarios, ds2], compat='override')

ds_scenarios

In [24]:

# Compute the extraction of points and get them into a dataframe
df_f = ds_scenarios.to_dataframe()
df_f.describe()

Out[24]:

	CMIP.NASA-GISS.GISS-E2-1-G.historical.Amon.gn	ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp585.Amon.gn	ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp126.Amon.gn	ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp245.Amon.gn	ScenarioMIP.NASA-GISS.GISS-E2-1-G.ssp370.Amon.gn
count	1980.000000	1032.000000	1032.000000	1032.000000	1032.000000
mean	50.481350	56.429333	54.320847	54.670521	55.567814
std	17.083851	18.733564	17.760897	18.030851	18.147505
min	17.100286	24.288807	23.459339	17.731342	20.571692
25%	34.650837	38.736763	37.438919	37.466412	38.507408
50%	49.329117	55.102242	53.725243	53.735107	55.053162
75%	67.013054	74.381668	71.752480	72.313545	73.257935
max	82.487946	92.229523	87.206024	86.336349	93.018997

In [25]:

# Lets make a time series plot here so we can see the results.

ax = df_f.plot(figsize=(40, 10), title="NASA GISS", grid=1)
ax.set(xlabel='Date', ylabel='2-m Air Temperature (deg F)')
plt.show()

In [ ]: