#!/usr/bin/env python
# coding: utf-8

# # Calculate monthly mean data from daily inputs

# ## notes
MW5-1 19980101 to 20230603
MW-IR5-1 20020601 to 20230603
xr.ds.to-netcdf
ds.var.attr
.compute() or .load()

https://github.com/PCMDI/input4MIPs_CVs/issues/5

Salinity
OISSS/UHawaii
https://search.earthdata.nasa.gov/search/granules?p=C2589160971-POCLOUD&pg[0][v]=f&pg[0][gsk]=-start_date&tl=1700072617!3!!
https://podaac.jpl.nasa.gov/dataset/OISSS_L4_multimission_7day_v2
https://github.com/podaac/data-subscriber/tree/main - https://podaac.jpl.nasa.gov/legacy_retirement.html

240111: updated from xcd060 to xcd061cdmcdu env
(xcd061cdmcdu) bash-4.2$ cdscan -x 199801.xml v20230605/199801*.nc
240112: xCDAT temporal_average issue https://github.com/xCDAT/xcdat/issues/586

To-do
# ## imports

# In[1]:


get_ipython().run_cell_magic('time', '', 'import cartopy.crs as ccrs\nimport datetime as dt\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport os\nimport xarray as xr\nimport xcdat as xc\nimport cdms2 as cdm\nimport cdutil as cdu\nimport cdtime as cdt\nimport cftime as cft\n')


# ## functions

# In[2]:


def nearestNeighbourFill(data, missingValue=0):
    """
    Documentation for nearestNeighbourFill():
    -------
    The nearestNeighbourFill() function iteratively infills a 2D matrix
    with values from immediately neighbouring cells

    Author: Paul J. Durack : pauldurack@llnl.gov

    Inputs:
    -----

    |  **data** - a numpy 2D array
    |  **missingValue** - missing value of data matrix

    Returns:
    -------

    |  **filledData** - a numpy array with no missingValues

    Usage:
    ------
        data = np.array([[1, 2, 3, 4],
                         [5, 0, 7, 8],
                         [9, 10, 11, 12]])

        filledData = nearestNeighborFill(data, missingValue=0)
        print(filledData)
    
    Notes:
    -----
    * PJD 28 Nov 2023 - Started
    """

    # Make copy of input matrix
    filledData = data.copy()

    # Find indices of missing values
    missingIndices = np.argwhere(data == missingValue)

    for idx in missingIndices:
        row, col = idx
        neighbors = []

        # Iterate over neighbouring cells
        for i in range(max(0, row - 1), min(data.shape[0], row + 2)):
            for j in range(max(0, col - 1), min(data.shape[1], col + 2)):
                if (i, j) != (row, col) and data[i, j] != missingValue:
                    neighbours.append(data[i, j])

        # Fill missing value with the mean of neighbours
        if neighbours:
            filledData[row, col] = np.mean(neighbours)

    return filledData


def iterativeZonalFill(data, missingValue=0):
    """
    Documentation for iterativeZonalFill():
    -------
    The iterativeZonalFill() function iteratively infills a 2D matrix
    with values zonal neighbouring cells

    Author: Paul J. Durack : pauldurack@llnl.gov

    Inputs:
    -----

    |  **data** - a numpy 2D array
    |  **missingValue** - missing value of data matrix

    Returns:
    -------

    |  **filledData** - a numpy array with no missingValues

    Usage:
    ------
        data = np.array([[1, 2, 3, 4],
                         [5, 0, 7, 8],
                         [9, 10, 11, 12]])

        filledData = iterativeZonalFill(data, missingValue=0)
        print(filledData)
    
    Notes:
    -----
    * PJD 28 Nov 2023 - Started
    """
   
    # Make copy of input matrix
    filledData = data.copy()

    # Find indices of missing values
    missingIndices = np.argwhere(data == missingValue)

    # Define directions for iteration (right, down, left, up)
    directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]

    for direction in directions:
        dx, dy = direction

        # Iterate over the data in the specified direction
        for i in range(1, max(data.shape) + 1):
            for idx in missingIndices:
                row, col = idx
                new_row, new_col = row + i * dx, col + i * dy

                # Check if the new indices are within the data boundaries
                if 0 <= new_row < data.shape[0] and 0 <= new_col < data.shape[1]:
                    if data[new_row, new_col] != missingValue:
                        filledData[row, col] = data[new_row, new_col]

    return filledData


# ## set data paths

# In[3]:


obsPath = "/p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/"


# # cdat reads

# In[4]:


print(obsPath.split("/")[1:])
obsPathXml = os.path.join("/", *obsPath.split("/")[1:7], "199801.xml")
# open file handle
fH = cdm.open(obsPathXml)
# read sst variable
sst199801 = fH("analysed_sst")
print("sst199801.getTime():", sst199801.getTime())
time199801 = sst199801.getTime().asComponentTime()
#print("time199801:", time199801)
# assign correct bounds for daily data
cdu.setTimeBoundsDaily(sst199801)
time199801d = sst199801.getTime().asComponentTime()
#print("time199801d:", time199801d)  # identical to time199801
# calculate monthly mean
sst199801mean = cdu.JAN(sst199801)
# query array shapes
print("sst199801.shape:", sst199801.shape)
print("sst199801mean.shape:", sst199801mean.shape)
# query cdat-generated time values
time199801mean = sst199801mean.getTime().asComponentTime()
print()
print("time199801mean:\n", time199801mean[0])
sst1998meanTimeBounds = sst199801mean.getTime().getBounds()[0]
# map back to relative
origin = dt.datetime(1981, 1, 1, 0, 0, 0)
startBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[0])
endBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[1])
print("time199801mean bounds:\n", startBounds, "\n", endBounds)
fH.close()


# # xcdat reads

# In[4]:


def setCalendar(ds):
    # https://github.com/pydata/xarray/issues/6259
    ds.time.attrs["calendar"] = "standard"
    ds.time.attrs["units"] = "seconds since 1981-01-01 00:00:00"
    return ds
    #return xr.decode_cf(ds)

dataPath = os.path.join(obsPath, "199801*.nc")
#dataPath = os.path.join(obsPath, "1998*.nc")
print("dataPath:", dataPath)
ds = xc.open_mfdataset(dataPath, preprocess=setCalendar)
print("done!")


# ## view dataset

# In[5]:


ds


# ## add calendar attribute to time axis

# In[6]:


ds.time.attrs['calendar'] = 'standard'


# In[7]:


ds


# ## add missing time_bnds

# In[8]:


ds = ds.bounds.add_missing_bounds(axes="T")
#ds.time.attrs["bounds"] = "time_bnds"


# In[9]:


ds


# ## try xCDAT temporal.group_average

# In[39]:


mean_monthXc = ds.temporal.group_average("analysed_sst", freq="month", weighted=True)


# In[40]:


mean_monthXc
# loses bounds
# time should be 1998-1-16 12:00, but is rather 1998-1-1 00:00


# In[41]:


#mean_monthXc2 = mean_monthXc.bounds.add_time_bounds(method="freq", freq="month")
# reassigning bounds fails due to 
# ValueError: Cannot generate bounds for coordinate variable 'time' which has a length <= 1 (singleton).


# ## correct xCDAT temporal.group_average

# In[69]:


#mean_monthXc.time
# update new time value
mean_monthXc.variables["time"]
newTime = [cft.DatetimeGregorian(1998, 1, 16, 12, 0, 0, 0, has_year_zero=False)]
mean_monthXc.update({"time": ("time", newTime)})
# assign new time_bnds variable
newTimeBoundVals = np.array([[cft.DatetimeGregorian(1998, 1, 1, 0, 0, 0, 0, has_year_zero=False),
                 cft.DatetimeGregorian(1998, 2, 1, 0, 0, 0, 0, has_year_zero=False)]])
print(type(newTimeBoundVals))
print(newTimeBoundVals.shape)
newTimeBounds = xr.Dataset({"time_bnds": (("time", "bnds"), newTimeBoundVals)})
#newTimeBounds
mean_monthXc.assign(newTimeBounds)


# ## try xCDAT temporal.average

# In[18]:


mean_monthXc2 = ds.temporal.average("analysed_sst", weighted=True)


# In[21]:


mean_monthXc2
# loses time dimension altogether


# # xarray reads

# In[24]:


mean_monthXr = ds.analysed_sst.resample(time='ME').mean()
# https://stackoverflow.com/questions/50564459/using-xarray-to-make-monthly-average


# In[25]:


mean_monthXr
# time should be 1998-1-16 12:00, but is rather 1998-1-31 00:00


# In[12]:


ds.analysed_sst


# In[27]:


xr.show_versions()


# In[ ]: