#!/usr/bin/env python # coding: utf-8 # # Calculate monthly mean data from daily inputs # ## notes MW5-1 19980101 to 20230603 MW-IR5-1 20020601 to 20230603 xr.ds.to-netcdf ds.var.attr .compute() or .load() https://github.com/PCMDI/input4MIPs_CVs/issues/5 Salinity OISSS/UHawaii https://search.earthdata.nasa.gov/search/granules?p=C2589160971-POCLOUD&pg[0][v]=f&pg[0][gsk]=-start_date&tl=1700072617!3!! https://podaac.jpl.nasa.gov/dataset/OISSS_L4_multimission_7day_v2 https://github.com/podaac/data-subscriber/tree/main - https://podaac.jpl.nasa.gov/legacy_retirement.html 240111: updated from xcd060 to xcd061cdmcdu env (xcd061cdmcdu) bash-4.2$ cdscan -x 199801.xml v20230605/199801*.nc 240112: xCDAT temporal_average issue https://github.com/xCDAT/xcdat/issues/586 To-do # ## imports # In[1]: get_ipython().run_cell_magic('time', '', 'import cartopy.crs as ccrs\nimport datetime as dt\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport os\nimport xarray as xr\nimport xcdat as xc\nimport cdms2 as cdm\nimport cdutil as cdu\nimport cdtime as cdt\nimport cftime as cft\n') # ## functions # In[2]: def nearestNeighbourFill(data, missingValue=0): """ Documentation for nearestNeighbourFill(): ------- The nearestNeighbourFill() function iteratively infills a 2D matrix with values from immediately neighbouring cells Author: Paul J. Durack : pauldurack@llnl.gov Inputs: ----- | **data** - a numpy 2D array | **missingValue** - missing value of data matrix Returns: ------- | **filledData** - a numpy array with no missingValues Usage: ------ data = np.array([[1, 2, 3, 4], [5, 0, 7, 8], [9, 10, 11, 12]]) filledData = nearestNeighborFill(data, missingValue=0) print(filledData) Notes: ----- * PJD 28 Nov 2023 - Started """ # Make copy of input matrix filledData = data.copy() # Find indices of missing values missingIndices = np.argwhere(data == missingValue) for idx in missingIndices: row, col = idx neighbors = [] # Iterate over neighbouring cells for i in range(max(0, row - 1), min(data.shape[0], row + 2)): for j in range(max(0, col - 1), min(data.shape[1], col + 2)): if (i, j) != (row, col) and data[i, j] != missingValue: neighbours.append(data[i, j]) # Fill missing value with the mean of neighbours if neighbours: filledData[row, col] = np.mean(neighbours) return filledData def iterativeZonalFill(data, missingValue=0): """ Documentation for iterativeZonalFill(): ------- The iterativeZonalFill() function iteratively infills a 2D matrix with values zonal neighbouring cells Author: Paul J. Durack : pauldurack@llnl.gov Inputs: ----- | **data** - a numpy 2D array | **missingValue** - missing value of data matrix Returns: ------- | **filledData** - a numpy array with no missingValues Usage: ------ data = np.array([[1, 2, 3, 4], [5, 0, 7, 8], [9, 10, 11, 12]]) filledData = iterativeZonalFill(data, missingValue=0) print(filledData) Notes: ----- * PJD 28 Nov 2023 - Started """ # Make copy of input matrix filledData = data.copy() # Find indices of missing values missingIndices = np.argwhere(data == missingValue) # Define directions for iteration (right, down, left, up) directions = [(0, 1), (1, 0), (0, -1), (-1, 0)] for direction in directions: dx, dy = direction # Iterate over the data in the specified direction for i in range(1, max(data.shape) + 1): for idx in missingIndices: row, col = idx new_row, new_col = row + i * dx, col + i * dy # Check if the new indices are within the data boundaries if 0 <= new_row < data.shape[0] and 0 <= new_col < data.shape[1]: if data[new_row, new_col] != missingValue: filledData[row, col] = data[new_row, new_col] return filledData # ## set data paths # In[3]: obsPath = "/p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/" # # cdat reads # In[4]: print(obsPath.split("/")[1:]) obsPathXml = os.path.join("/", *obsPath.split("/")[1:7], "199801.xml") # open file handle fH = cdm.open(obsPathXml) # read sst variable sst199801 = fH("analysed_sst") print("sst199801.getTime():", sst199801.getTime()) time199801 = sst199801.getTime().asComponentTime() #print("time199801:", time199801) # assign correct bounds for daily data cdu.setTimeBoundsDaily(sst199801) time199801d = sst199801.getTime().asComponentTime() #print("time199801d:", time199801d) # identical to time199801 # calculate monthly mean sst199801mean = cdu.JAN(sst199801) # query array shapes print("sst199801.shape:", sst199801.shape) print("sst199801mean.shape:", sst199801mean.shape) # query cdat-generated time values time199801mean = sst199801mean.getTime().asComponentTime() print() print("time199801mean:\n", time199801mean[0]) sst1998meanTimeBounds = sst199801mean.getTime().getBounds()[0] # map back to relative origin = dt.datetime(1981, 1, 1, 0, 0, 0) startBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[0]) endBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[1]) print("time199801mean bounds:\n", startBounds, "\n", endBounds) fH.close() # # xcdat reads # In[4]: def setCalendar(ds): # https://github.com/pydata/xarray/issues/6259 ds.time.attrs["calendar"] = "standard" ds.time.attrs["units"] = "seconds since 1981-01-01 00:00:00" return ds #return xr.decode_cf(ds) dataPath = os.path.join(obsPath, "199801*.nc") #dataPath = os.path.join(obsPath, "1998*.nc") print("dataPath:", dataPath) ds = xc.open_mfdataset(dataPath, preprocess=setCalendar) print("done!") # ## view dataset # In[5]: ds # ## add calendar attribute to time axis # In[6]: ds.time.attrs['calendar'] = 'standard' # In[7]: ds # ## add missing time_bnds # In[8]: ds = ds.bounds.add_missing_bounds(axes="T") #ds.time.attrs["bounds"] = "time_bnds" # In[9]: ds # ## try xCDAT temporal.group_average # In[39]: mean_monthXc = ds.temporal.group_average("analysed_sst", freq="month", weighted=True) # In[40]: mean_monthXc # loses bounds # time should be 1998-1-16 12:00, but is rather 1998-1-1 00:00 # In[41]: #mean_monthXc2 = mean_monthXc.bounds.add_time_bounds(method="freq", freq="month") # reassigning bounds fails due to # ValueError: Cannot generate bounds for coordinate variable 'time' which has a length <= 1 (singleton). # ## correct xCDAT temporal.group_average # In[69]: #mean_monthXc.time # update new time value mean_monthXc.variables["time"] newTime = [cft.DatetimeGregorian(1998, 1, 16, 12, 0, 0, 0, has_year_zero=False)] mean_monthXc.update({"time": ("time", newTime)}) # assign new time_bnds variable newTimeBoundVals = np.array([[cft.DatetimeGregorian(1998, 1, 1, 0, 0, 0, 0, has_year_zero=False), cft.DatetimeGregorian(1998, 2, 1, 0, 0, 0, 0, has_year_zero=False)]]) print(type(newTimeBoundVals)) print(newTimeBoundVals.shape) newTimeBounds = xr.Dataset({"time_bnds": (("time", "bnds"), newTimeBoundVals)}) #newTimeBounds mean_monthXc.assign(newTimeBounds) # ## try xCDAT temporal.average # In[18]: mean_monthXc2 = ds.temporal.average("analysed_sst", weighted=True) # In[21]: mean_monthXc2 # loses time dimension altogether # # xarray reads # In[24]: mean_monthXr = ds.analysed_sst.resample(time='ME').mean() # https://stackoverflow.com/questions/50564459/using-xarray-to-make-monthly-average # In[25]: mean_monthXr # time should be 1998-1-16 12:00, but is rather 1998-1-31 00:00 # In[12]: ds.analysed_sst # In[27]: xr.show_versions() # In[ ]: