dataset_name = "satellite_chlorophylla_oc3_1day_aqua"
import xarray as xr
import fsspec
# only run once, then restart session if needed
!pip install uv
import os
import sys
def is_colab():
try:
import google.colab
return True
except ImportError:
return False
# Get the current directory of the notebook
current_dir = os.getcwd()
# Check if requirements.txt exists in the current directory
local_requirements = os.path.join(current_dir, 'requirements.txt')
if os.path.exists(local_requirements):
requirements_path = local_requirements
else:
# Fall back to the online requirements.txt file
requirements_path = 'https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/notebooks/requirements.txt'
# Install packages using uv and the determined requirements file
if is_colab():
xr.set_options(display_style='text')
os.system(f'uv pip install --system -r {requirements_path}')
else:
os.system('uv venv')
os.system(f'uv pip install -r {requirements_path}')
Requirement already satisfied: uv in /home/lbesnard/miniforge3/envs/AodnCloudOptimised/lib/python3.12/site-packages (0.4.18)
Using CPython 3.12.6 interpreter at: /home/lbesnard/miniforge3/envs/AodnCloudOptimised/bin/python Creating virtual environment at: .venv Activate with: source .venv/bin/activate Audited 232 packages in 18ms
import requests
import os
if not os.path.exists('DataQuery.py'):
print('Downloading DataQuery.py')
url = 'https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/aodn_cloud_optimised/lib/DataQuery.py'
response = requests.get(url)
with open('DataQuery.py', 'w') as f:
f.write(response.text)
from DataQuery import plot_gridded_variable, create_timeseries, plot_time_coverage
# remote zarr dataset
url = f's3://aodn-cloud-optimised/{dataset_name}.zarr/'
ds = xr.open_zarr(fsspec.get_mapper(url, anon=True), consolidated=True)
ds
<xarray.Dataset> Size: 4TB Dimensions: (time: 8020, latitude: 7001, longitude: 10001) Coordinates: * latitude (latitude) float64 56kB 10.0 9.99 9.98 ... -59.98 -59.99 -60.0 * longitude (longitude) float64 80kB 80.0 80.01 80.02 ... 180.0 180.0 180.0 * time (time) datetime64[ns] 64kB 2002-07-04T05:30:00 ... 2024-11-15T... Data variables: chl_oc3 (time, latitude, longitude) float64 4TB dask.array<chunksize=(5, 500, 500), meta=np.ndarray> Attributes: Conventions: CF-1.6 history: File initialised at 2024-09-17T06:21:12.126924\nInitialised... source_path: imos-srs/archive/oc/aqua/v202302/1d/2024/09/A20240915.L2OC_...
plot_time_coverage(ds)
%%time
time_series_df = create_timeseries(ds, 'chl_oc3', lat=-40, lon=130, start_time='2002-08-01', end_time='2011-12-31', lon_name="longitude", lat_name="latitude")
CPU times: user 26.5 s, sys: 3.32 s, total: 29.9 s Wall time: 1min 32s
plot_gridded_variable(ds, start_date='2024-11-01', lon_slice=(130, 150), lat_slice=(-50, -30), var_name='chl_oc3', n_days=3, coastline_resolution="50m", lon_name="longitude", lat_name="latitude", log_scale=True)
Nearest date in dataset: <xarray.DataArray 'time' ()> Size: 8B array('2024-11-01T05:30:00.000000000', dtype='datetime64[ns]') Coordinates: time datetime64[ns] 8B 2024-11-01T05:30:00 Attributes: axis: T long_name: time standard_name: time Variable Long Name: Chlorophyll Concentration, OC3 Algorithm
/home/lbesnard/github_repo/aodn_cloud_optimised/notebooks/DataQuery.py:836: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect. plt.tight_layout()