#!/usr/bin/env python # coding: utf-8 # # Calculate monthly mean data from daily inputs - differences between CDAT vs xCDAT/xarray # ## notes MW5-1 19980101 to 20230603 MW-IR5-1 20020601 to 20230603 xr.ds.to-netcdf ds.var.attr .compute() or .load() https://github.com/PCMDI/input4MIPs_CVs/issues/5 240111: updated from xcd060 to xcd061cdmcdu env (xcd061cdmcdu) bash-4.2$ cdscan -x 199801.xml v20230605/199801*.nc 240112: xCDAT temporal_average issue https://github.com/xCDAT/xcdat/issues/586 240118: renamed and cleaned up as standalone cdat vs xcdat/xarray daily -> monthly time averaging example To-do # ## imports # In[4]: get_ipython().run_cell_magic('time', '', '# using xcd061cdmcdu env\nimport cartopy.crs as ccrs\nimport datetime as dt\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport os\nimport xarray as xr\nimport xcdat as xc\nimport cdms2 as cdm\nimport cdutil as cdu\nimport cdtime as cdt\nimport cftime as cft\n') # ## set data paths # In[5]: obsPath = "/p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/" # # cdat reads # In[6]: print(obsPath.split("/")[1:]) obsPathXml = os.path.join("/", *obsPath.split("/")[1:7], "199801.xml") # open file handle fH = cdm.open(obsPathXml) # read sst variable sst199801 = fH("analysed_sst") print("sst199801.getTime():", sst199801.getTime()) time199801 = sst199801.getTime().asComponentTime() #print("time199801:", time199801) # assign correct bounds for daily data cdu.setTimeBoundsDaily(sst199801) time199801d = sst199801.getTime().asComponentTime() #print("time199801d:", time199801d) # identical to time199801 # calculate monthly mean sst199801mean = cdu.JAN(sst199801) # query array shapes print("sst199801.shape:", sst199801.shape) print("sst199801mean.shape:", sst199801mean.shape) # query cdat-generated time values time199801mean = sst199801mean.getTime().asComponentTime() print() print("time199801mean:\n", time199801mean[0]) sst1998meanTimeBounds = sst199801mean.getTime().getBounds()[0] # map back to relative origin = dt.datetime(1981, 1, 1, 0, 0, 0) startBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[0]) endBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[1]) print("time199801mean bounds:\n", startBounds, "\n", endBounds) fH.close() # # xcdat reads # In[7]: def setCalendar(ds): # https://github.com/pydata/xarray/issues/6259 ds.time.attrs["calendar"] = "standard" ds.time.attrs["units"] = "seconds since 1981-01-01 00:00:00" return ds #return xr.decode_cf(ds) dataPath = os.path.join(obsPath, "199801*.nc") #dataPath = os.path.join(obsPath, "1998*.nc") print("dataPath:", dataPath) ds = xc.open_mfdataset(dataPath, preprocess=setCalendar) print("done!") # ## view dataset # In[8]: ds # ## add calendar attribute to time axis # In[9]: ds.time.attrs['calendar'] = 'standard' # In[10]: ds # ## add missing time_bnds # In[11]: ds = ds.bounds.add_missing_bounds(axes="T") #ds.time.attrs["bounds"] = "time_bnds" # In[12]: ds # ## try xCDAT temporal.group_average # In[13]: mean_monthXc = ds.temporal.group_average("analysed_sst", freq="month", weighted=True) # In[14]: mean_monthXc # loses bounds # time should be 1998-1-16 12:00, but is rather 1998-1-1 00:00 # In[15]: #mean_monthXc2 = mean_monthXc.bounds.add_time_bounds(method="freq", freq="month") # reassigning bounds fails due to # ValueError: Cannot generate bounds for coordinate variable 'time' which has a length <= 1 (singleton). # ## correct xCDAT temporal.group_average # In[16]: #mean_monthXc.time # update new time value mean_monthXc.variables["time"] newTime = [cft.DatetimeGregorian(1998, 1, 16, 12, 0, 0, 0, has_year_zero=False)] mean_monthXc.update({"time": ("time", newTime)}) # assign new time_bnds variable newTimeBoundVals = np.array([[cft.DatetimeGregorian(1998, 1, 1, 0, 0, 0, 0, has_year_zero=False), cft.DatetimeGregorian(1998, 2, 1, 0, 0, 0, 0, has_year_zero=False)]]) print(type(newTimeBoundVals)) print(newTimeBoundVals.shape) newTimeBounds = xr.Dataset({"time_bnds": (("time", "bnds"), newTimeBoundVals)}) #newTimeBounds mean_monthXc.assign(newTimeBounds) # ## try xCDAT temporal.average # In[17]: mean_monthXc2 = ds.temporal.average("analysed_sst", weighted=True) # In[18]: mean_monthXc2 # loses time dimension altogether # # xarray reads # In[19]: mean_monthXr = ds.analysed_sst.resample(time='ME').mean() # https://stackoverflow.com/questions/50564459/using-xarray-to-make-monthly-average # In[20]: mean_monthXr # time should be 1998-1-16 12:00, but is rather 1998-1-31 00:00 # In[21]: ds.analysed_sst # In[22]: xr.show_versions() # In[ ]: