%%time
import cartopy.crs as ccrs
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
import os
import xarray as xr
import xcdat as xc
import cdms2 as cdm
import cdutil as cdu
import cdtime as cdt
import cftime as cft
CPU times: user 7.03 s, sys: 1.56 s, total: 8.59 s Wall time: 7.5 s
def nearestNeighbourFill(data, missingValue=0):
"""
Documentation for nearestNeighbourFill():
-------
The nearestNeighbourFill() function iteratively infills a 2D matrix
with values from immediately neighbouring cells
Author: Paul J. Durack : pauldurack@llnl.gov
Inputs:
-----
| **data** - a numpy 2D array
| **missingValue** - missing value of data matrix
Returns:
-------
| **filledData** - a numpy array with no missingValues
Usage:
------
data = np.array([[1, 2, 3, 4],
[5, 0, 7, 8],
[9, 10, 11, 12]])
filledData = nearestNeighborFill(data, missingValue=0)
print(filledData)
Notes:
-----
* PJD 28 Nov 2023 - Started
"""
# Make copy of input matrix
filledData = data.copy()
# Find indices of missing values
missingIndices = np.argwhere(data == missingValue)
for idx in missingIndices:
row, col = idx
neighbors = []
# Iterate over neighbouring cells
for i in range(max(0, row - 1), min(data.shape[0], row + 2)):
for j in range(max(0, col - 1), min(data.shape[1], col + 2)):
if (i, j) != (row, col) and data[i, j] != missingValue:
neighbours.append(data[i, j])
# Fill missing value with the mean of neighbours
if neighbours:
filledData[row, col] = np.mean(neighbours)
return filledData
def iterativeZonalFill(data, missingValue=0):
"""
Documentation for iterativeZonalFill():
-------
The iterativeZonalFill() function iteratively infills a 2D matrix
with values zonal neighbouring cells
Author: Paul J. Durack : pauldurack@llnl.gov
Inputs:
-----
| **data** - a numpy 2D array
| **missingValue** - missing value of data matrix
Returns:
-------
| **filledData** - a numpy array with no missingValues
Usage:
------
data = np.array([[1, 2, 3, 4],
[5, 0, 7, 8],
[9, 10, 11, 12]])
filledData = iterativeZonalFill(data, missingValue=0)
print(filledData)
Notes:
-----
* PJD 28 Nov 2023 - Started
"""
# Make copy of input matrix
filledData = data.copy()
# Find indices of missing values
missingIndices = np.argwhere(data == missingValue)
# Define directions for iteration (right, down, left, up)
directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]
for direction in directions:
dx, dy = direction
# Iterate over the data in the specified direction
for i in range(1, max(data.shape) + 1):
for idx in missingIndices:
row, col = idx
new_row, new_col = row + i * dx, col + i * dy
# Check if the new indices are within the data boundaries
if 0 <= new_row < data.shape[0] and 0 <= new_col < data.shape[1]:
if data[new_row, new_col] != missingValue:
filledData[row, col] = data[new_row, new_col]
return filledData
obsPath = "/p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/"
print(obsPath.split("/")[1:])
obsPathXml = os.path.join("/", *obsPath.split("/")[1:7], "199801.xml")
# open file handle
fH = cdm.open(obsPathXml)
# read sst variable
sst199801 = fH("analysed_sst")
print("sst199801.getTime():", sst199801.getTime())
time199801 = sst199801.getTime().asComponentTime()
#print("time199801:", time199801)
# assign correct bounds for daily data
cdu.setTimeBoundsDaily(sst199801)
time199801d = sst199801.getTime().asComponentTime()
#print("time199801d:", time199801d) # identical to time199801
# calculate monthly mean
sst199801mean = cdu.JAN(sst199801)
# query array shapes
print("sst199801.shape:", sst199801.shape)
print("sst199801mean.shape:", sst199801mean.shape)
# query cdat-generated time values
time199801mean = sst199801mean.getTime().asComponentTime()
print()
print("time199801mean:\n", time199801mean[0])
sst1998meanTimeBounds = sst199801mean.getTime().getBounds()[0]
# map back to relative
origin = dt.datetime(1981, 1, 1, 0, 0, 0)
startBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[0])
endBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[1])
print("time199801mean bounds:\n", startBounds, "\n", endBounds)
fH.close()
['p', 'user_pub', 'PCMDIobs', 'obs4MIPs_input', 'RSS', 'RSS-MW5-1', 'v20230605', ''] sst199801.getTime(): id: time Designated a time axis. units: seconds since 1981-01-01 00:00:00 Length: 31 First: 536500800.0 Last: 539092800.0 Other axis attributes: axis: T calendar: gregorian realtopology: linear Python id: 0x7f2aeb89cee0 sst199801.shape: (31, 720, 1440) sst199801mean.shape: (1, 720, 1440) time199801mean: 1998-1-16 12:0:0.0 time199801mean bounds: 1998-01-01 00:00:00 1998-02-01 00:00:00
def setCalendar(ds):
# https://github.com/pydata/xarray/issues/6259
ds.time.attrs["calendar"] = "standard"
ds.time.attrs["units"] = "seconds since 1981-01-01 00:00:00"
return ds
#return xr.decode_cf(ds)
dataPath = os.path.join(obsPath, "199801*.nc")
#dataPath = os.path.join(obsPath, "1998*.nc")
print("dataPath:", dataPath)
ds = xc.open_mfdataset(dataPath, preprocess=setCalendar)
print("done!")
dataPath: /p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/199801*.nc done!
ds
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, time: 31, bnds: 2) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00 Dimensions without coordinates: bnds Data variables: analysed_sst (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> analysis_error (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> sea_ice_fraction (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> mask (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 180.0 lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 90.0 Attributes: (12/46) Conventions: CF-1.8,ACDD-1.3 title: Analysed foundation sea surface temperature o... summary: A merged, multi-sensor L4 foundation SST prod... references: http://www.remss.com/measurements/sea-surface... institution: REMSS history: 2021-11-11 18:18:15+0000 created by sst_fusio... ... ... project: Group for High Resolution Sea Surface Tempera... publisher_name: The GHRSST Project Office publisher_email: ghrsst-po@nceo.ac.uk publisher_url: http://www.ghrsst.org processing_level: L4 cdm_data_type: grid
ds.time.attrs['calendar'] = 'standard'
ds
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, time: 31, bnds: 2) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00 Dimensions without coordinates: bnds Data variables: analysed_sst (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> analysis_error (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> sea_ice_fraction (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> mask (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 180.0 lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 90.0 Attributes: (12/46) Conventions: CF-1.8,ACDD-1.3 title: Analysed foundation sea surface temperature o... summary: A merged, multi-sensor L4 foundation SST prod... references: http://www.remss.com/measurements/sea-surface... institution: REMSS history: 2021-11-11 18:18:15+0000 created by sst_fusio... ... ... project: Group for High Resolution Sea Surface Tempera... publisher_name: The GHRSST Project Office publisher_email: ghrsst-po@nceo.ac.uk publisher_url: http://www.ghrsst.org processing_level: L4 cdm_data_type: grid
ds = ds.bounds.add_missing_bounds(axes="T")
#ds.time.attrs["bounds"] = "time_bnds"
ds
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, time: 31, bnds: 2) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00 Dimensions without coordinates: bnds Data variables: analysed_sst (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> analysis_error (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> sea_ice_fraction (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> mask (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 180.0 lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 90.0 time_bnds (time, bnds) object 1998-01-01 00:00:00 ... 1998-02-01 ... Attributes: (12/46) Conventions: CF-1.8,ACDD-1.3 title: Analysed foundation sea surface temperature o... summary: A merged, multi-sensor L4 foundation SST prod... references: http://www.remss.com/measurements/sea-surface... institution: REMSS history: 2021-11-11 18:18:15+0000 created by sst_fusio... ... ... project: Group for High Resolution Sea Surface Tempera... publisher_name: The GHRSST Project Office publisher_email: ghrsst-po@nceo.ac.uk publisher_url: http://www.ghrsst.org processing_level: L4 cdm_data_type: grid
mean_monthXc = ds.temporal.group_average("analysed_sst", freq="month", weighted=True)
mean_monthXc
# loses bounds
# time should be 1998-1-16 12:00, but is rather 1998-1-1 00:00
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, bnds: 2, time: 1) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 * time (time) object 1998-01-01 00:00:00 Dimensions without coordinates: bnds Data variables: lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 179.8 180.0 lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0 analysed_sst (time, lat, lon) float64 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> Attributes: (12/46) Conventions: CF-1.8,ACDD-1.3 title: Analysed foundation sea surface temperature o... summary: A merged, multi-sensor L4 foundation SST prod... references: http://www.remss.com/measurements/sea-surface... institution: REMSS history: 2021-11-11 18:18:15+0000 created by sst_fusio... ... ... project: Group for High Resolution Sea Surface Tempera... publisher_name: The GHRSST Project Office publisher_email: ghrsst-po@nceo.ac.uk publisher_url: http://www.ghrsst.org processing_level: L4 cdm_data_type: grid
#mean_monthXc2 = mean_monthXc.bounds.add_time_bounds(method="freq", freq="month")
# reassigning bounds fails due to
# ValueError: Cannot generate bounds for coordinate variable 'time' which has a length <= 1 (singleton).
#mean_monthXc.time
# update new time value
mean_monthXc.variables["time"]
newTime = [cft.DatetimeGregorian(1998, 1, 16, 12, 0, 0, 0, has_year_zero=False)]
mean_monthXc.update({"time": ("time", newTime)})
# assign new time_bnds variable
newTimeBoundVals = np.array([[cft.DatetimeGregorian(1998, 1, 1, 0, 0, 0, 0, has_year_zero=False),
cft.DatetimeGregorian(1998, 2, 1, 0, 0, 0, 0, has_year_zero=False)]])
print(type(newTimeBoundVals))
print(newTimeBoundVals.shape)
newTimeBounds = xr.Dataset({"time_bnds": (("time", "bnds"), newTimeBoundVals)})
#newTimeBounds
mean_monthXc.assign(newTimeBounds)
<class 'numpy.ndarray'> (1, 2)
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, bnds: 2, time: 1) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 * time (time) object 1998-01-16 12:00:00 Dimensions without coordinates: bnds Data variables: lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 179.8 180.0 lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0 analysed_sst (time, lat, lon) float64 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray> time_bnds (time, bnds) object 1998-01-01 00:00:00 1998-02-01 00:00:00 Attributes: (12/46) Conventions: CF-1.8,ACDD-1.3 title: Analysed foundation sea surface temperature o... summary: A merged, multi-sensor L4 foundation SST prod... references: http://www.remss.com/measurements/sea-surface... institution: REMSS history: 2021-11-11 18:18:15+0000 created by sst_fusio... ... ... project: Group for High Resolution Sea Surface Tempera... publisher_name: The GHRSST Project Office publisher_email: ghrsst-po@nceo.ac.uk publisher_url: http://www.ghrsst.org processing_level: L4 cdm_data_type: grid
mean_monthXc2 = ds.temporal.average("analysed_sst", weighted=True)
mean_monthXc2
# loses time dimension altogether
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, bnds: 2) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 Dimensions without coordinates: bnds Data variables: lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 179.8 180.0 lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0 analysed_sst (lat, lon) float64 dask.array<chunksize=(720, 1440), meta=np.ndarray> Attributes: (12/46) Conventions: CF-1.8,ACDD-1.3 title: Analysed foundation sea surface temperature o... summary: A merged, multi-sensor L4 foundation SST prod... references: http://www.remss.com/measurements/sea-surface... institution: REMSS history: 2021-11-11 18:18:15+0000 created by sst_fusio... ... ... project: Group for High Resolution Sea Surface Tempera... publisher_name: The GHRSST Project Office publisher_email: ghrsst-po@nceo.ac.uk publisher_url: http://www.ghrsst.org processing_level: L4 cdm_data_type: grid
mean_monthXr = ds.analysed_sst.resample(time='ME').mean()
# https://stackoverflow.com/questions/50564459/using-xarray-to-make-monthly-average
mean_monthXr
# time should be 1998-1-16 12:00, but is rather 1998-1-31 00:00
<xarray.DataArray 'analysed_sst' (time: 1, lat: 720, lon: 1440)> dask.array<stack, shape=(1, 720, 1440), dtype=float32, chunksize=(1, 720, 1440), chunktype=numpy.ndarray> Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 -179.1 ... 179.4 179.6 179.9 * time (time) object 1998-01-31 00:00:00 Attributes: units: K long_name: analysed sea surface temperature standard_name: sea_surface_foundation_temperature valid_min: -32767 valid_max: 32767 source: REMSS-L3C-TMI coverage_content_type: physicalMeasurement
ds.analysed_sst
<xarray.DataArray 'analysed_sst' (time: 31, lat: 720, lon: 1440)> dask.array<concatenate, shape=(31, 720, 1440), dtype=float32, chunksize=(1, 720, 1440), chunktype=numpy.ndarray> Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 -179.1 ... 179.4 179.6 179.9 * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00 Attributes: units: K long_name: analysed sea surface temperature standard_name: sea_surface_foundation_temperature valid_min: -32767 valid_max: 32767 source: REMSS-L3C-TMI coverage_content_type: physicalMeasurement
xr.show_versions()
INSTALLED VERSIONS ------------------ commit: None python: 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0] python-bits: 64 OS: Linux OS-release: 3.10.0-1160.90.1.el7.x86_64 machine: x86_64 processor: x86_64 byteorder: little LC_ALL: en_US.UTF-8 LANG: en_US.UTF-8 LOCALE: ('en_US', 'UTF-8') libhdf5: 1.14.3 libnetcdf: 4.9.2 xarray: 2023.12.0 pandas: 2.1.4 numpy: 1.26.3 scipy: 1.11.4 netCDF4: 1.6.5 pydap: None h5netcdf: None h5py: None Nio: None zarr: None cftime: 1.6.3 nc_time_axis: None iris: None bottleneck: 1.3.7 dask: 2023.12.1 distributed: 2023.12.1 matplotlib: 3.8.2 cartopy: 0.22.0 seaborn: None numbagg: None fsspec: 2023.12.2 cupy: None pint: None sparse: 0.15.1 flox: None numpy_groupies: None setuptools: 69.0.3 pip: 23.3.2 conda: None pytest: None mypy: None IPython: 8.20.0 sphinx: None