dataset_name = "slocum_glider_delayed_qc"
# only run once, then restart session if needed
!pip install uv
import os
import sys
def is_colab():
try:
import google.colab
return True
except ImportError:
return False
# Get the current directory of the notebook
current_dir = os.getcwd()
# Check if requirements.txt exists in the current directory
local_requirements = os.path.join(current_dir, 'requirements.txt')
if os.path.exists(local_requirements):
requirements_path = local_requirements
else:
# Fall back to the online requirements.txt file
requirements_path = 'https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/notebooks/requirements.txt'
# Install packages using uv and the determined requirements file
if is_colab():
os.system(f'uv pip install --system -r {requirements_path}')
else:
os.system('uv venv')
os.system(f'uv pip install -r {requirements_path}')
Requirement already satisfied: uv in /home/lbesnard/miniforge3/envs/AodnCloudOptimised/lib/python3.12/site-packages (0.4.18)
Using CPython 3.12.6 interpreter at: /home/lbesnard/miniforge3/envs/AodnCloudOptimised/bin/python Creating virtual environment at: .venv Activate with: source .venv/bin/activate Audited 230 packages in 26ms
import requests
import os
if not os.path.exists('parquet_queries.py'):
print('Downloading parquet_queries.py')
url = 'https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/aodn_cloud_optimised/lib/ParquetDataQuery.py'
response = requests.get(url)
with open('parquet_queries.py', 'w') as f:
f.write(response.text)
from parquet_queries import create_time_filter, create_bbox_filter, query_unique_value, plot_spatial_extent, \
get_temporal_extent, get_schema_metadata, plot_ts_diagram
import pyarrow.parquet as pq
import pyarrow.dataset as pds
import pyarrow as pa
import pandas as pd
import pyarrow.compute as pc
BUCKET_OPTIMISED_DEFAULT="aodn-cloud-optimised"
dname = f"s3://anonymous@{BUCKET_OPTIMISED_DEFAULT}/{dataset_name}.parquet/"
parquet_ds = pq.ParquetDataset(dname,partitioning='hive')
Partitioning in Parquet involves organising data files based on the values of one or more columns, known as partition keys. When data is written to Parquet files with partitioning enabled, the files are physically stored in a directory structure that reflects the partition keys. This directory structure makes it easier to retrieve and process specific subsets of data based on the partition keys.
dataset = pds.dataset(dname, format="parquet", partitioning="hive")
partition_keys = dataset.partitioning.schema
print(partition_keys)
deployment_code: string timestamp: int32 polygon: string
%%time
unique_partition_value = query_unique_value(parquet_ds, 'deployment_code')
print(list(unique_partition_value)[0:2]) # showing a subset only
['Portland20170208', 'Cooktown20160503'] CPU times: user 4.52 ms, sys: 796 µs, total: 5.32 ms Wall time: 4.71 ms
In this section, we're plotting the polygons where data exists. This helps then with creating a bounding box where there is data
plot_spatial_extent(parquet_ds)
/home/lbesnard/miniforge3/envs/AodnCloudOptimised/lib/python3.12/site-packages/cartopy/mpl/feature_artist.py:144: UserWarning: facecolor will have no effect as it has been defined as "never". warnings.warn('facecolor will have no effect as it has been ' /home/lbesnard/github_repo/aodn_cloud_optimised/notebooks/parquet_queries.py:449: UserWarning: Legend does not support handles for PatchCollection instances. See: https://matplotlib.org/stable/tutorials/intermediate/legend_guide.html#implementing-a-custom-legend-handler ax.legend() /home/lbesnard/github_repo/aodn_cloud_optimised/notebooks/parquet_queries.py:449: UserWarning: No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. ax.legend()
Similary to the spatial extent, we're retrieving the minimum and maximum timestamp partition values of the dataset. This is not necessarely accurately representative of the TIME values, as the timestamp partition can be yearly/monthly... but is here to give an idea
get_temporal_extent(parquet_ds)
(datetime.datetime(2008, 4, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 4, 1, 0, 0, tzinfo=datetime.timezone.utc))
For all parquet dataset, we create a sidecar file in the root of the dataset named _common_matadata. This contains the variable attributes.
# parquet_meta = pa.parquet.read_schema(os.path.join(dname + '_common_metadata')) # parquet metadata
metadata = get_schema_metadata(dname) # schema metadata
metadata
{'PLATFORM': {'type': 'string', 'trans_system_id': 'Irridium', 'positioning_system': 'GPS', 'platform_type': 'Slocum G2', 'platform_maker': 'Teledyne Webb Research', 'firmware_version_navigation': 7.1, 'firmware_version_science': 7.1, 'glider_serial_no': '416', 'battery_type': 'Alkaline', 'glider_owner': 'CSIRO', 'operating_institution': 'ANFOG', 'long_name': 'platform informations'}, 'DEPLOYMENT': {'type': 'string', 'deployment_start_date': '2015-10-21-T05:00:02Z', 'deployment_start_latitude': -18.9373, 'deployment_start_longitude': 146.881, 'deployment_start_technician': 'Gregor, Rob', 'deployment_end_date': '2015-10-27-T01:56:23Z', 'deployment_end_latitude': -19.2358, 'deployment_end_longitude': 147.5188, 'deployment_end_status': 'recovered', 'deployment_pilot': 'pilot, CSIRO', 'long_name': 'deployment informations'}, 'SENSOR1': {'type': 'string', 'sensor_type': 'CTD', 'sensor_maker': 'Seabird', 'sensor_model': 'GPCTD', 'sensor_serial_no': '9117', 'sensor_calibration_date': '2013-09-17', 'sensor_parameters': 'TEMP, CNDC, PRES, PSAL', 'long_name': 'sensor1 informations'}, 'SENSOR2': {'type': 'string', 'sensor_type': 'ECO Puck', 'sensor_maker': 'Wetlabs', 'sensor_model': 'FLBBCDSLC', 'sensor_serial_no': '3345', 'sensor_calibration_date': '2013-10-07', 'sensor_parameters': 'CPHL, CDOM, VBSC', 'long_name': 'sensor2 informations'}, 'SENSOR3': {'type': 'string', 'sensor_type': 'oxygen sensor', 'sensor_maker': 'Aanderaa', 'sensor_model': 'OXY4831_WPHASE', 'sensor_serial_no': '249', 'sensor_calibration_date': '2013-09-20', 'sensor_parameters': 'DOX1, DOX2', 'long_name': 'sensor3 informations'}, 'SENSOR4': {'type': 'string', 'sensor_type': 'SUNAV2', 'sensor_maker': 'SAtlantic', 'sensor_model': 'SUNAV2', 'sensor_serial_no': '349', 'sensor_calibration_date': '2014-05-05', 'sensor_parameters': 'NITRATE', 'long_name': 'sensor4 informations'}, 'LATITUDE': {'type': 'double', 'standard_name': 'latitude', 'long_name': 'latitude', 'units': 'degrees_north', 'axis': 'Y', 'valid_min': -90.0, 'valid_max': 90.0, 'comment': 'obtained from GPS fixes', 'reference_datum': 'geographical coordinates, WGS84 projection', 'ancillary_variables': 'LATITUDE_quality_control', 'observation_type': 'measured', 'quality_control_set': 1}, 'LATITUDE_quality_control': {'type': 'float', 'standard_name': 'latitude status_flag', 'long_name': 'quality control flag for latitude', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'LONGITUDE': {'type': 'double', 'standard_name': 'longitude', 'long_name': 'longitude', 'units': 'degrees_east', 'valid_min': -180.0, 'valid_max': 180.0, 'comment': 'obtained from GPS fixes', 'reference_datum': 'geographical coordinates, WGS84 projection', 'ancillary_variables': 'LONGITUDE_quality_control', 'observation_type': 'measured', 'quality_control_set': 1, 'axis': 'X'}, 'LONGITUDE_quality_control': {'type': 'float', 'standard_name': 'longitude status_flag', 'long_name': 'quality control flag for longitude', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'TIME': {'type': 'timestamp[ns]', 'standard_name': 'time', 'long_name': 'time from the CTD', 'axis': 'T', 'valid_min': 0.0, 'valid_max': 90000.0, 'ancillary_variables': 'TIME_quality_control', 'observation_type': 'measured', 'quality_control_set': 1}, 'TIME_quality_control': {'type': 'float', 'standard_name': 'time status_flag', 'long_name': 'quality control flag for time from the CTD', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'HEAD': {'type': 'double', 'long_name': 'vehicle_heading', 'units': 'Degrees', 'valid_min': 0.0, 'valid_max': 360.0, 'comment': 'Vehicle heading in degrees, clockwise from magnetic north.', 'ancillary_variables': 'HEAD_quality_control', 'observation_type': 'measured', 'quality_control_set': 1}, 'HEAD_quality_control': {'type': 'float', 'long_name': 'quality control flag for vehicle_heading', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'UCUR': {'type': 'double', 'standard_name': 'eastward_sea_water_velocity', 'long_name': 'eastward_sea_water_velocity', 'units': 'm s-1', 'valid_min': -10.0, 'valid_max': 10.0, 'comment': 'Average eastward velocity of the seawater over all the water that the glider travels through between surfacing. The values are rough estimates derived from engineering parameters.', 'ancillary_variables': 'UCUR_quality_control', 'observation_type': 'computed', 'quality_control_set': 1}, 'UCUR_quality_control': {'type': 'float', 'standard_name': 'eastward_sea_water_velocity status_flag', 'long_name': 'quality control flag for eastward_sea_water_velocity', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'VCUR': {'type': 'double', 'standard_name': 'northward_sea_water_velocity', 'long_name': 'northward_sea_water_velocity', 'units': 'm s-1', 'valid_min': -10.0, 'valid_max': 10.0, 'comment': 'Average northward velocity of the seawater over all the water that the glider travels through between surfacing. The values are rough estimates derived from engineering parameters.', 'ancillary_variables': 'VCUR_quality_control', 'observation_type': 'computed', 'quality_control_set': 1}, 'VCUR_quality_control': {'type': 'float', 'standard_name': 'northward_sea_water_velocity status_flag', 'long_name': 'quality control flag for northward_sea_water_velocity', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'UCUR_GPS': {'type': 'double', 'long_name': 'eastward_surface_sea_water_velocity', 'units': 'm s-1', 'valid_min': -10.0, 'valid_max': 10.0, 'comment': 'Eastward surface sea-water velocity, calculated from parked glider drift between GPS fixes.', 'observation_type': 'computed', 'quality_control_set': 1, 'ancillary_variables': 'UCUR_GPS_quality_control'}, 'UCUR_GPS_quality_control': {'type': 'float', 'long_name': 'quality control flag for eastward_surface_sea_water_velocity', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'VCUR_GPS': {'type': 'double', 'long_name': 'northward_surface_sea_water_velocity', 'units': 'm s-1', 'valid_min': -10.0, 'valid_max': 10.0, 'comment': 'Northward surface sea-water velocity, calculated from parked glider drift between GPS fixes.', 'ancillary_variables': 'VCUR_GPS_quality_control', 'observation_type': 'computed', 'quality_control_set': 1}, 'VCUR_GPS_quality_control': {'type': 'float', 'long_name': 'quality control flag for northward_surface_sea_water_velocity', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'PHASE': {'type': 'float', 'long_name': 'glider_trajectory_phase_code', 'valid_min': 0, 'valid_max': 6, 'comment': 'Phase of the trajectory at that time, defined following EGO (see table 9 in the EGO user manual v1.1). Values used: 0 for surface drifting, 1 for descending profile, 4 for ascending profile, 3 for inflexion. Computed using a script based on depth rate', 'observation_type': 'computed', 'quality_control_set': 1, 'units': '1', 'ancillary_variables': 'PHASE_quality_control PROFILE'}, 'PHASE_quality_control': {'type': 'float', 'long_name': 'quality control flag for glider_trajectory_phase_code', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'PROFILE': {'type': 'double', 'long_name': 'glider_trajectory_profile_number', 'valid_min': 0, 'valid_max': 900000, 'comment': 'The profile number is 1, and is increased at each phase change. It is set to 0 when the glider is at surface (PHASE=0) or during inflexion (PHASE=3) or if the profile is too short (less than 10 pressure measurements). Computed using a script based on depth rate', 'observation_type': 'computed', 'quality_control_set': 1, 'units': '1', 'ancillary_variables': 'PROFILE_quality_control PHASE'}, 'PROFILE_quality_control': {'type': 'float', 'long_name': 'quality control flag for glider_trajectory_profile_number', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'PRES': {'type': 'double', 'standard_name': 'sea_water_pressure', 'long_name': 'sea_water_pressure', 'units': 'dbar', 'valid_min': -5.0, 'valid_max': 1100.0, 'comment': 'pressure measured by the CTD', 'ancillary_variables': 'PRES_quality_control', 'observation_type': 'measured', 'quality_control_set': 1, 'quality_control_indicator': 1.0}, 'PRES_quality_control': {'type': 'float', 'standard_name': 'sea_water_pressure status_flag', 'long_name': 'quality control flag for sea_water_pressure', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'DEPTH': {'type': 'double', 'standard_name': 'depth', 'long_name': 'depth', 'units': 'm', 'valid_min': -5.0, 'valid_max': 1000.0, 'positive': 'down', 'axis': 'Z', 'reference_datum': 'sea surface', 'ancillary_variables': 'DEPTH_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'Depth computed using the Gibbs-SeaWater toolbox (TEOS-10) v3.02, from latitude and relative pressure measurements', 'quality_control_indicator': 1.0}, 'DEPTH_quality_control': {'type': 'float', 'standard_name': 'depth status_flag', 'long_name': 'quality control flag for depth', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'TEMP': {'type': 'double', 'standard_name': 'sea_water_temperature', 'long_name': 'sea_water_temperature', 'units': 'Celsius', 'valid_min': -2.5, 'valid_max': 40.0, 'ancillary_variables': 'TEMP_quality_control', 'observation_type': 'measured', 'quality_control_set': 1, 'comment': 'Data have been corrected for sensor time response. Uncorrected data are available in the FV00 file associated with this mission. '}, 'TEMP_quality_control': {'type': 'float', 'standard_name': 'sea_water_temperature status_flag', 'long_name': 'quality control flag for sea_water_temperature', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'CNDC': {'type': 'double', 'standard_name': 'sea_water_electrical_conductivity', 'long_name': 'sea_water_electrical_conductivity', 'units': 'S m-1', 'valid_min': 0.0, 'valid_max': 60.0, 'observation_type': 'measured', 'ancillary_variables': 'CNDC_quality_control', 'quality_control_set': 1, 'comment': 'Data have been corrected for sensor time response. Uncorrected data are available in the FV00 file associated with this mission. '}, 'CNDC_quality_control': {'type': 'float', 'standard_name': 'sea_water_electrical_conductivity status_flag', 'long_name': 'quality control flag for sea_water_electrical_conductivity', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'PSAL': {'type': 'double', 'standard_name': 'sea_water_salinity', 'long_name': 'sea_water_salinity', 'valid_min': 2.0, 'valid_max': 41.0, 'ancillary_variables': 'PSAL_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'practical salinity computed using the Gibbs-SeaWater toolbox (TEOS-10) v3.02 from temperature, conductivity and relative pressure measurements. Users are cautioned that spikes predominantly occuring at the base of the mixed layer result from thermal inertia issues and may remain although correction have been applied.Temperature data have been corrected for sensor time response. A thermal lag correction has been applied. Conductivity data have been corrected for sensor time response. ', 'units': '1e-3'}, 'PSAL_quality_control': {'type': 'float', 'standard_name': 'sea_water_salinity status_flag', 'long_name': 'quality control flag for sea_water_salinity', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'DOX2': {'type': 'double', 'standard_name': 'moles_of_oxygen_per_unit_mass_in_sea_water', 'long_name': 'moles_of_oxygen_per_unit_mass_in_sea_water', 'units': 'umol kg-1', 'valid_min': 0.0, 'valid_max': 650.0, 'ancillary_variables': 'DOX2_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'mole concentration per unit mass computed as the mole concentration per litre divided by the potential density. Glider output. Data computed internally from phase measurements using the Optode temperature and calibration coefficients. Data should be used with caution.'}, 'DOX2_quality_control': {'type': 'float', 'standard_name': 'moles_of_oxygen_per_unit_mass_in_sea_water status_flag', 'long_name': 'quality control flag for moles_of_oxygen_per_unit_mass_in_sea_water', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'DOX1': {'type': 'double', 'standard_name': 'mole_concentration_of_dissolved_molecular_oxygen_in_sea_water', 'long_name': 'mole_concentration_of_dissolved_molecular_oxygen_in_sea_water', 'units': 'umol l-1', 'valid_min': 0.0, 'valid_max': 650.0, 'ancillary_variables': 'DOX1_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'Glider output. Data computed internally from phase measurements using the Optode temperature and calibration coefficients. Data should be used with caution. (Need to multiply by 0.0223916 to get values in ml/L). '}, 'DOX1_quality_control': {'type': 'float', 'standard_name': 'mole_concentration_of_dissolved_molecular_oxygen_in_sea_water status_flag', 'long_name': 'quality control flag for mole_concentration_of_dissolved_molecular_oxygen_in_sea_water', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'CPHL': {'type': 'double', 'standard_name': 'mass_concentration_of_chlorophyll_in_sea_water', 'long_name': 'mass_concentration_of_chlorophyll_in_sea_water', 'units': 'mg m-3', 'valid_min': 0.0, 'valid_max': 100.0, 'ancillary_variables': 'CPHL_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'Data have been corrected to account for a suspected shift in Dark Count value since the calibration that lead to many negative values. Uncorrected data are available in the FV00 file associated with this mission. '}, 'CPHL_quality_control': {'type': 'float', 'standard_name': 'mass_concentration_of_chlorophyll_in_sea_water status_flag', 'long_name': 'quality control flag for mass_concentration_of_chlorophyll_in_sea_water', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'CDOM': {'type': 'double', 'long_name': 'concentration_of_coloured_dissolved_organic_matter', 'valid_min': 0.0, 'valid_max': 400.0, 'ancillary_variables': 'CDOM_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'Data have been corrected to account for a suspected shift in Dark Count value since the calibration that lead to many negative values. Uncorrected data are available in the FV00 file associated with this mission. ', 'units': '1e-9'}, 'CDOM_quality_control': {'type': 'float', 'long_name': 'quality control flag for concentration_of_coloured_dissolved_organic_matter', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'VBSC': {'type': 'double', 'long_name': 'volumetric_backscatter_coefficient', 'units': 'm-1 sr-1', 'valid_min': 0.0, 'valid_max': 0.1, 'ancillary_variables': 'VBSC_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'concentration computed from the measured raw values in counts and the calibration coefficients stored in the associate FV00 NetCDF file'}, 'VBSC_quality_control': {'type': 'float', 'long_name': 'quality control flag for volumetric_backscatter_coefficient', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'NTRA': {'type': 'double', 'long_name': 'concentration_of_nitrate_in_seawater', 'units': 'umol l-1', 'valid_min': 0.0, 'valid_max': 25.0, 'comment': 'uncorrected raw optical nitrate in micromolar', 'observation_type': 'measured', 'ancillary_variables': 'NTRA_quality_control', 'quality_control_set': 1, 'standard_name': 'mole_concentration_of_nitrate_in_sea_water'}, 'NTRA_quality_control': {'type': 'float', 'long_name': 'quality control flag for concentration_of_nitrate_in_seawater', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'standard_name': 'mole_concentration_of_nitrate_in_sea_water status_flag', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'IRRAD443': {'type': 'double', 'long_name': 'downwelling_spectral_irradiance_in_sea_water_beam1', 'units': 'uW cm-2 nm-1', 'valid_min': 0.0, 'valid_max': 1000.0, 'ancillary_variables': 'IRRAD443_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'The real wavelength for this beam is 443.52 nm. Irradiance data have been corrected to account for suspected shift in Dark Count values since the calibration, that lead to many negative values. Uncorrected data are available in the FV00 file associated with this mission. '}, 'IRRAD443_quality_control': {'type': 'float', 'long_name': 'quality control flag for downwelling_spectral_irradiance_in_sea_water_beam1', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'IRRAD490': {'type': 'double', 'long_name': 'downwelling_spectral_irradiance_in_sea_water_beam2', 'units': 'uW cm-2 nm-1', 'valid_min': 0.0, 'valid_max': 1000.0, 'ancillary_variables': 'IRRAD490_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'The real wavelength for this beam is 489.59 nm. Irradiance data have been corrected to account for suspected shift in Dark Count values since the calibration, that lead to many negative values. Uncorrected data are available in the FV00 file associated with this mission. '}, 'IRRAD490_quality_control': {'type': 'float', 'long_name': 'quality control flag for downwelling_spectral_irradiance_in_sea_water_beam2', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'IRRAD555': {'type': 'double', 'long_name': 'downwelling_spectral_irradiance_in_sea_water_beam3', 'units': 'uW cm-2 nm-1', 'valid_min': 0.0, 'valid_max': 1000.0, 'ancillary_variables': 'IRRAD555_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'The real wavelength for this beam is 554.77 nm. Irradiance data have been corrected to account for suspected shift in Dark Count values since the calibration, that lead to many negative values. Uncorrected data are available in the FV00 file associated with this mission. '}, 'IRRAD555_quality_control': {'type': 'float', 'long_name': 'quality control flag for downwelling_spectral_irradiance_in_sea_water_beam3', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'IRRAD670': {'type': 'double', 'long_name': 'downwelling_spectral_irradiance_in_sea_water_beam4', 'units': 'uW cm-2 nm-1', 'valid_min': 0.0, 'valid_max': 1000.0, 'ancillary_variables': 'IRRAD670_quality_control', 'observation_type': 'computed', 'quality_control_set': 1, 'comment': 'The real wavelength for this beam is 664.89 nm. Irradiance data have been corrected to account for suspected shift in Dark Count values since the calibration, that lead to many negative values. Uncorrected data are available in the FV00 file associated with this mission. '}, 'IRRAD670_quality_control': {'type': 'float', 'long_name': 'quality control flag for downwelling_spectral_irradiance_in_sea_water_beam4', 'quality_control_set': 1, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'BBP': {'type': 'double', 'long_name': 'particle_backscattering_coefficient', 'units': 'm-1', 'valid_min': 0.0, 'valid_max': 1.0, 'ancillary_variables': 'BBP_quality_control', 'observation_type': 'computed', 'quality_control_set': 1.0, 'comment': 'Computed using a Xp factor = 1.1 and backscatter from seawater from Zhang (2009) code'}, 'BBP_quality_control': {'type': 'int32', 'long_name': 'quality control flag for particle_backscattering_coefficient', 'FillValue': 99, 'valid_min': 0, 'valid_max': 9, 'flag_values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'observation_type': 'computed', 'quality_control_set': 1, 'flag_meanings': 'no_qc_performed good_data probably_good_data bad_data_that_are_potentially_correctable bad_data value_changed not_used not_used interpolated_values missing_values', 'quality_control_conventions': 'IMOS standard set using the IODE flags'}, 'filename': {'type': 'string'}, 'timestamp': {'type': 'int64'}, 'polygon': {'type': 'string'}, 'deployment_code': {'type': 'string'}, 'dataset_metadata': {'metadata_uuid': 'c317b0fe-02e8-4ff9-96c9-563fd58e82ac', 'title': 'ANFOG glider'}}
filter_time = create_time_filter(parquet_ds, date_start='2020-12-01', date_end='2023-01-01')
filter_geo = create_bbox_filter(parquet_ds, lat_min=-34, lat_max=-28, lon_min=140, lon_max=160)
filter = filter_geo & filter_time
%%time
# using pandas instead of pyarrow so that filters can directly be applied to the data, and not just the partition
df = pd.read_parquet(dname, engine='pyarrow',filters=filter)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 6898759 entries, 0 to 6898758 Data columns (total 62 columns): # Column Dtype --- ------ ----- 0 PLATFORM object 1 DEPLOYMENT object 2 SENSOR1 object 3 SENSOR2 object 4 SENSOR3 object 5 SENSOR4 object 6 LATITUDE float64 7 LATITUDE_quality_control float32 8 LONGITUDE float64 9 LONGITUDE_quality_control float32 10 TIME datetime64[ns] 11 TIME_quality_control float32 12 HEAD float64 13 HEAD_quality_control float32 14 UCUR float64 15 UCUR_quality_control float32 16 VCUR float64 17 VCUR_quality_control float32 18 UCUR_GPS float64 19 UCUR_GPS_quality_control float32 20 VCUR_GPS float64 21 VCUR_GPS_quality_control float32 22 PHASE float32 23 PHASE_quality_control float32 24 PROFILE float64 25 PROFILE_quality_control float32 26 PRES float64 27 PRES_quality_control float32 28 DEPTH float64 29 DEPTH_quality_control float32 30 TEMP float64 31 TEMP_quality_control float32 32 CNDC float64 33 CNDC_quality_control float32 34 PSAL float64 35 PSAL_quality_control float32 36 DOX2 float64 37 DOX2_quality_control float32 38 DOX1 float64 39 DOX1_quality_control float32 40 CPHL float64 41 CPHL_quality_control float32 42 CDOM float64 43 CDOM_quality_control float32 44 VBSC float64 45 VBSC_quality_control float32 46 NTRA float64 47 NTRA_quality_control float32 48 filename object 49 IRRAD443 float64 50 IRRAD443_quality_control float32 51 IRRAD490 float64 52 IRRAD490_quality_control float32 53 IRRAD555 float64 54 IRRAD555_quality_control float32 55 IRRAD670 float64 56 IRRAD670_quality_control float32 57 BBP float64 58 BBP_quality_control int32 59 deployment_code category 60 timestamp category 61 polygon category dtypes: category(3), datetime64[ns](1), float32(26), float64(24), int32(1), object(7) memory usage: 2.4+ GB CPU times: user 10.3 s, sys: 11.3 s, total: 21.6 s Wall time: 52.4 s
filter_time = create_time_filter(parquet_ds, date_start='2024-01-01', date_end='2024-05-01')
expr_1 = pc.field('deployment_code') == pa.scalar("MissionBeach20240120")
filter = expr_1 & filter_time
%%time
# using pandas instead of pyarrow so that filters can directly be applied to the data, and not just the partition
df = pd.read_parquet(dname, engine='pyarrow',filters=filter)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 228684 entries, 0 to 228683 Data columns (total 62 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PLATFORM 228684 non-null object 1 DEPLOYMENT 228684 non-null object 2 SENSOR1 228684 non-null object 3 SENSOR2 228684 non-null object 4 SENSOR3 228684 non-null object 5 SENSOR4 228684 non-null object 6 LATITUDE 228684 non-null float64 7 LATITUDE_quality_control 228684 non-null float32 8 LONGITUDE 228684 non-null float64 9 LONGITUDE_quality_control 228684 non-null float32 10 TIME 228684 non-null datetime64[ns] 11 TIME_quality_control 228684 non-null float32 12 HEAD 228478 non-null float64 13 HEAD_quality_control 228684 non-null float32 14 UCUR 356 non-null float64 15 UCUR_quality_control 228684 non-null float32 16 VCUR 356 non-null float64 17 VCUR_quality_control 228684 non-null float32 18 UCUR_GPS 356 non-null float64 19 UCUR_GPS_quality_control 228684 non-null float32 20 VCUR_GPS 356 non-null float64 21 VCUR_GPS_quality_control 228684 non-null float32 22 PHASE 226879 non-null float32 23 PHASE_quality_control 228684 non-null float32 24 PROFILE 226879 non-null float64 25 PROFILE_quality_control 228684 non-null float32 26 PRES 228683 non-null float64 27 PRES_quality_control 228684 non-null float32 28 DEPTH 228683 non-null float64 29 DEPTH_quality_control 228684 non-null float32 30 TEMP 228683 non-null float64 31 TEMP_quality_control 228684 non-null float32 32 CNDC 228683 non-null float64 33 CNDC_quality_control 228684 non-null float32 34 PSAL 228683 non-null float64 35 PSAL_quality_control 228684 non-null float32 36 DOX2 191168 non-null float64 37 DOX2_quality_control 228684 non-null float32 38 DOX1 191168 non-null float64 39 DOX1_quality_control 228684 non-null float32 40 CPHL 228684 non-null float64 41 CPHL_quality_control 228684 non-null float32 42 CDOM 228684 non-null float64 43 CDOM_quality_control 228684 non-null float32 44 VBSC 228684 non-null float64 45 VBSC_quality_control 228684 non-null float32 46 NTRA 0 non-null float64 47 NTRA_quality_control 0 non-null float32 48 filename 228684 non-null object 49 IRRAD443 228684 non-null float64 50 IRRAD443_quality_control 228684 non-null float32 51 IRRAD490 228684 non-null float64 52 IRRAD490_quality_control 228684 non-null float32 53 IRRAD555 228684 non-null float64 54 IRRAD555_quality_control 228684 non-null float32 55 IRRAD670 228684 non-null float64 56 IRRAD670_quality_control 228684 non-null float32 57 BBP 228683 non-null float64 58 BBP_quality_control 228684 non-null int32 59 deployment_code 228684 non-null category 60 timestamp 228684 non-null category 61 polygon 228684 non-null category dtypes: category(3), datetime64[ns](1), float32(26), float64(24), int32(1), object(7) memory usage: 80.3+ MB CPU times: user 546 ms, sys: 1.21 s, total: 1.75 s Wall time: 5.32 s
df.plot.scatter(x='LONGITUDE', y='LATITUDE', c='TEMP', marker='+', linestyle="None", cmap='RdYlBu_r', title='Temperature for each location')
<Axes: title={'center': 'Temperature for each location'}, xlabel='LONGITUDE', ylabel='LATITUDE'>
import matplotlib.pyplot as plt
ax = df.plot.scatter(x='TIME', y='DEPTH', c='TEMP', marker='+', linestyle="None", cmap='RdYlBu_r', title='Temperature timeseries Profile')
ax.invert_yaxis()
# Rotate x-axis labels at 45 degrees
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
# Show the plot
plt.show()
/tmp/ipykernel_2863934/1403522444.py:7: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
filtered_df = df[(df['PSAL_quality_control'] == 1) & (df['PSAL'] >= 25)]
plot_ts_diagram(filtered_df, temp_col='TEMP', psal_col='PSAL', depth_col='DEPTH')