import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray as rxr
import os
import xarray as xr
import geopandas as gpd
import os
from osgeo import gdal
os.chdir(r'C:/Users/jtrum/world_bank/data/')
datadir = 'C:/Users/jtrum/world_bank/data/'
pluvial = ['P_1in5', 'P_1in10', 'P_1in20', 'P_1in50', 'P_1in75', 'P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000']
fluvial_undefined = ['FU_1in5', 'FU_1in10', 'FU_1in20', 'FU_1in50', 'FU_1in75', 'FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']
data_dict = {}
for i in pluvial:
data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')
for i in fluvial_undefined:
data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/fluvial_undefended/{i}.tif')
# crop the 20 rasters to the extent of catchment basin
catchment = gpd.read_file(datadir + 'catchment.geojson')
clip = catchment.geometry
# set crs of rasters to equal crs of catchment
for i in data_dict:
data_dict[i] = data_dict[i].rio.write_crs(catchment.crs)
# print crs of raster, then crs of catchment
print(data_dict[i].rio.crs)
print(catchment.crs)
# clip rasters to catchment extent
for i in data_dict:
data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)
print(i, 'clipped')
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] P_1in5 clipped P_1in10 clipped P_1in20 clipped P_1in50 clipped P_1in75 clipped P_1in100 clipped P_1in200 clipped P_1in250 clipped P_1in500 clipped P_1in1000 clipped FU_1in5 clipped FU_1in10 clipped FU_1in20 clipped FU_1in50 clipped FU_1in75 clipped FU_1in100 clipped FU_1in200 clipped FU_1in250 clipped FU_1in500 clipped FU_1in1000 clipped
def binarize_band(raster_path, threshold=0.15):
raster_dataset = gdal.Open(raster_path, gdal.GA_Update)
if raster_dataset is None:
print(f"Unable to open {raster_path}")
return
band = raster_dataset.GetRasterBand(1)
band_data = band.ReadAsArray()
binary_data = (band_data > threshold).astype(int)
new_band = raster_dataset.GetRasterBand(1) # You can choose any band number you want
new_band.WriteArray(binary_data)
raster_dataset.FlushCache()
raster_dataset = None
print(f"Binarized {raster_path}")
# Define the threshold value
threshold = 0.15
# Clip rasters to catchment extent
for i in data_dict:
data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)
print(i, 'clipped')
# Get the path to the clipped raster file
clipped_raster_path = datadir + i + '_clipped.tif'
data_dict[i].to_netcdf(clipped_raster_path) # Save the clipped raster as a NetCDF
# Binarize the band data in the clipped raster
binarize_band(clipped_raster_path, threshold)
P_1in5 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in5_clipped.tif P_1in10 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in10_clipped.tif P_1in20 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in20_clipped.tif P_1in50 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in50_clipped.tif P_1in75 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in75_clipped.tif P_1in100 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in100_clipped.tif P_1in200 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in200_clipped.tif P_1in250 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in250_clipped.tif P_1in500 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in500_clipped.tif P_1in1000 clipped Binarized C:/Users/jtrum/world_bank/data/P_1in1000_clipped.tif FU_1in5 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in5_clipped.tif FU_1in10 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in10_clipped.tif FU_1in20 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in20_clipped.tif FU_1in50 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in50_clipped.tif FU_1in75 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in75_clipped.tif FU_1in100 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in100_clipped.tif FU_1in200 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in200_clipped.tif FU_1in250 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in250_clipped.tif FU_1in500 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in500_clipped.tif FU_1in1000 clipped Binarized C:/Users/jtrum/world_bank/data/FU_1in1000_clipped.tif
# turn the clipped rasters into a dictionary
clipped_dict = {}
for i in pluvial:
clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')
for i in fluvial_undefined:
clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')
from scipy import stats
# Define the groups based on your criteria
groups = [
['P_1in5', 'P_1in10'],
['P_1in20', 'P_1in50', 'P_1in75'],
['P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000'],
['FU_1in5', 'FU_1in10'],
['FU_1in20', 'FU_1in50', 'FU_1in75'],
['FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']
]
def compute_mode_value(rasters):
# Stack the rasters along a new dimension
stacked_data = xr.concat([rasters[i]['band_data'] for i in range(len(rasters))], dim='raster')
# Compute the mode along the 'raster' dimension
mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')
# Extract the mode values as a DataArray
mode_values = xr.DataArray(mode_data.mode, dims=stacked_data.dims, coords=stacked_data.coords)
return mode_values
# Create a dictionary to store mode values for each group
mode_values_dict = {}
# Compute mode values for each group
for group_indices, group_name in enumerate(groups):
group_rasters = [clipped_dict[i] for i in group_name]
mode_values = compute_mode_value(group_rasters)
mode_values_dict[f'Group_{group_indices + 1}'] = mode_values
# The mode values for each group can be accessed as mode_values_dict['Group_1'], mode_values_dict['Group_2'], etc.
C:\Users\jtrum\AppData\Local\Temp\ipykernel_17268\1252790497.py:18: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning. mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')
# now plot the 6 groups post-binarization
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.gridspec as gridspec
# Define the groups based on your criteria
groups = [
'FU_1in500'
#print as a dataframe
for i in data_dict:
data_dict[i] = data_dict[i].to_dataframe()
# turn each dataframe into its own variable so we don't have to call the dictionary every time
globals()[i] = data_dict[i]
print(i, 'converted to dataframe')
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_17268\1667624259.py in ?() 2 #print as a dataframe ----> 3 for i in data_dict: 4 data_dict[i] = data_dict[i].to_dataframe() 5 # turn each dataframe into its own variable so we don't have to call the dictionary every time 6 globals()[i] = data_dict[i] c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\generic.py in ?(self, name) 5985 and name not in self._accessors 5986 and self._info_axis._can_hold_identifiers_and_holds_name(name) 5987 ): 5988 return self[name] -> 5989 return object.__getattribute__(self, name) AttributeError: 'DataFrame' object has no attribute 'to_dataframe'
# use x and y to make into geodataframe
for i in data_dict:
data_dict[i] = data_dict[i].reset_index()
data_dict[i] = gpd.GeoDataFrame(data_dict[i], geometry=gpd.points_from_xy(data_dict[i].x, data_dict[i].y))
print(i, 'converted to geodataframe')
P_1in5 converted to geodataframe P_1in10 converted to geodataframe P_1in20 converted to geodataframe P_1in50 converted to geodataframe P_1in75 converted to geodataframe P_1in100 converted to geodataframe P_1in200 converted to geodataframe P_1in250 converted to geodataframe P_1in500 converted to geodataframe P_1in1000 converted to geodataframe FU_1in5 converted to geodataframe FU_1in10 converted to geodataframe FU_1in20 converted to geodataframe FU_1in50 converted to geodataframe FU_1in75 converted to geodataframe FU_1in100 converted to geodataframe FU_1in200 converted to geodataframe FU_1in250 converted to geodataframe FU_1in500 converted to geodataframe
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Untitled-1.ipynb Cell 4 line 3 <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=0'>1</a> # use x and y to make into geodataframe <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=1'>2</a> for i in data_dict: ----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=2'>3</a> data_dict[i] = data_dict[i].reset_index() <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=3'>4</a> data_dict[i] = gpd.GeoDataFrame(data_dict[i], geometry=gpd.points_from_xy(data_dict[i].x, data_dict[i].y)) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=4'>5</a> print(i, 'converted to geodataframe') TypeError: reset_index() missing 1 required positional argument: 'dims_or_levels'
# make a dictionary of the dataframes
df_dict = {'P_1in5',
'P_1in10',}
# for each dataframe, in the 'band_data' column, if the value is greater than or equal to 0.15, in a new column called 'threshold' put a 1, all else put a 0. use their actual dataframes and not the dictionary
P_1in5['threshold'] = P_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in10['threshold'] = P_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in20['threshold'] = P_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in50['threshold'] = P_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in75['threshold'] = P_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in100['threshold'] = P_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in200['threshold'] = P_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in250['threshold'] = P_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in500['threshold'] = P_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in1000['threshold'] = P_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in5['threshold'] = FU_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in10['threshold'] = FU_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in20['threshold'] = FU_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in50['threshold'] = FU_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in75['threshold'] = FU_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in100['threshold'] = FU_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in200['threshold'] = FU_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in250['threshold'] = FU_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in500['threshold'] = FU_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in1000['threshold'] = FU_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
# make 6 groupings of the dataframes, where each will be a new dataframe using the x and y columns and then the threshold values from each of the dataframes in the group, with the column being named 'threshold' + the name of the dataframe
# group 1 (P_1in5, P_1in10)
G1 = pd.DataFrame(P_1in5, columns=['band', 'x', 'y'])
G1['threshold_P_1in5'] = P_1in5['threshold']
G1['threshold_P_1in10'] = P_1in10['threshold']
# group 2 (P_1in20, P_1in50, P_1in75)
G2 = pd.DataFrame(P_1in20, columns=['band', 'x', 'y'])
G2['threshold_P_1in20'] = P_1in20['threshold']
G2['threshold_P_1in50'] = P_1in50['threshold']
G2['threshold_P_1in75'] = P_1in75['threshold']
# group 3 (P_1in100, P_1in200, P_1in250, P_1in500, P_1in1000)
G3 = pd.DataFrame(P_1in100, columns=['band', 'x', 'y'])
G3['threshold_P_1in100'] = P_1in100['threshold']
G3['threshold_P_1in200'] = P_1in200['threshold']
G3['threshold_P_1in250'] = P_1in250['threshold']
G3['threshold_P_1in500'] = P_1in500['threshold']
G3['threshold_P_1in1000'] = P_1in1000['threshold']
# group 4 (FU_1in5, FU_1in10)
G4 = pd.DataFrame(FU_1in5, columns=['band', 'x', 'y'])
G4['threshold_FU_1in5'] = FU_1in5['threshold']
G4['threshold_FU_1in10'] = FU_1in10['threshold']
# group 5 (FU_1in20, FU_1in50, FU_1in75)
G5 = pd.DataFrame(FU_1in20, columns=['band', 'x', 'y'])
G5['threshold_FU_1in20'] = FU_1in20['threshold']
G5['threshold_FU_1in50'] = FU_1in50['threshold']
G5['threshold_FU_1in75'] = FU_1in75['threshold']
# group 6 (FU_1in100, FU_1in200, FU_1in250, FU_1in500, FU_1in1000)
G6 = pd.DataFrame(FU_1in100, columns=['band', 'x', 'y'])
G6['threshold_FU_1in100'] = FU_1in100['threshold']
G6['threshold_FU_1in200'] = FU_1in200['threshold']
G6['threshold_FU_1in250'] = FU_1in250['threshold']
G6['threshold_FU_1in500'] = FU_1in500['threshold']
G6['threshold_FU_1in1000'] = FU_1in1000['threshold']
# take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups
G1['threshold'] = G1[['threshold_P_1in5', 'threshold_P_1in10']].mode(axis=1)[0]
# print("G1 threshold column created")
# G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]
# print("G2 threshold column created")
# G3['threshold'] = G3[['threshold_P_1in100', 'threshold_P_1in200', 'threshold_P_1in250', 'threshold_P_1in500', 'threshold_P_1in1000']].mode(axis=1)[0]
# print("G3 threshold column created")
# G4['threshold'] = G4[['threshold_FU_1in5', 'threshold_FU_1in10']].mode(axis=1)[0]
# print("G4 threshold column created")
# G5['threshold'] = G5[['threshold_FU_1in20', 'threshold_FU_1in50', 'threshold_FU_1in75']].mode(axis=1)[0]
# print("G5 threshold column created")
# G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]
# print("G6 threshold column created")
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Untitled-1.ipynb Cell 8 line 2 <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=0'>1</a> # take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups ----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=1'>2</a> G1['threshold'] = G1[['threshold_P_1in5', 'threshold_P_1in10']].mode(axis=1)[0] <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=2'>3</a> # print("G1 threshold column created") <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=3'>4</a> # G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0] <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=4'>5</a> # print("G2 threshold column created") (...) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=11'>12</a> # G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0] <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=12'>13</a> # print("G6 threshold column created") File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:10746, in DataFrame.mode(self, axis, numeric_only, dropna) 10743 def f(s): 10744 return s.mode(dropna=dropna) > 10746 data = data.apply(f, axis=axis) 10747 # Ensure index is type stable (should always use int index) 10748 if data.empty: File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:9423, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs) 9412 from pandas.core.apply import frame_apply 9414 op = frame_apply( 9415 self, 9416 func=func, (...) 9421 kwargs=kwargs, 9422 ) -> 9423 return op.apply().__finalize__(self, method="apply") File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\apply.py:678, in FrameApply.apply(self) 675 elif self.raw: 676 return self.apply_raw() --> 678 return self.apply_standard() File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\apply.py:798, in FrameApply.apply_standard(self) 797 def apply_standard(self): --> 798 results, res_index = self.apply_series_generator() 800 # wrap results 801 return self.wrap_results(results, res_index) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\apply.py:814, in FrameApply.apply_series_generator(self) 811 with option_context("mode.chained_assignment", None): 812 for i, v in enumerate(series_gen): 813 # ignore SettingWithCopy here in case the user mutates --> 814 results[i] = self.f(v) 815 if isinstance(results[i], ABCSeries): 816 # If we have a view on v, we need to make a copy because 817 # series_generator will swap out the underlying data 818 results[i] = results[i].copy(deep=False) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:10744, in DataFrame.mode.<locals>.f(s) 10743 def f(s): > 10744 return s.mode(dropna=dropna) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\series.py:2122, in Series.mode(self, dropna) 2120 values = self._values 2121 if isinstance(values, np.ndarray): -> 2122 res_values = algorithms.mode(values, dropna=dropna) 2123 else: 2124 res_values = values._mode(dropna=dropna) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\algorithms.py:998, in mode(values, dropna, mask) 996 npresult = htable.mode(values, dropna=dropna, mask=mask) 997 try: --> 998 npresult = np.sort(npresult) 999 except TypeError as err: 1000 warnings.warn( 1001 f"Unable to sort modes: {err}", 1002 stacklevel=find_stack_level(), 1003 ) File <__array_function__ internals>:200, in sort(*args, **kwargs) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\numpy\core\fromnumeric.py:1029, in sort(a, axis, kind, order) 1027 axis = -1 1028 else: -> 1029 a = asanyarray(a).copy(order="K") 1030 a.sort(axis=axis, kind=kind, order=order) 1031 return a KeyboardInterrupt:
# turn all dataframes into own variables
for i in data_dict:
globals()[i] = data_dict[i]
print(i, 'converted to variable')
P_1in5 converted to variable P_1in10 converted to variable P_1in20 converted to variable P_1in50 converted to variable P_1in75 converted to variable P_1in100 converted to variable P_1in200 converted to variable P_1in250 converted to variable P_1in500 converted to variable P_1in1000 converted to variable FU_1in5 converted to variable FU_1in10 converted to variable FU_1in20 converted to variable FU_1in50 converted to variable FU_1in75 converted to variable FU_1in100 converted to variable FU_1in200 converted to variable FU_1in250 converted to variable FU_1in500 converted to variable FU_1in1000 converted to variable
# drop na values
for i in data_dict:
data_dict[i] = data_dict[i].dropna()
print(i, 'dropped na values')
len(P_1in5)
P_1in5 dropped na values P_1in10 dropped na values P_1in20 dropped na values P_1in50 dropped na values P_1in75 dropped na values P_1in100 dropped na values P_1in200 dropped na values P_1in250 dropped na values P_1in500 dropped na values P_1in1000 dropped na values FU_1in5 dropped na values FU_1in10 dropped na values FU_1in20 dropped na values FU_1in50 dropped na values FU_1in75 dropped na values FU_1in100 dropped na values FU_1in200 dropped na values FU_1in250 dropped na values FU_1in500 dropped na values FU_1in1000 dropped na values
6627410
# import mode function
from scipy.stats import mode
# create grouping categories per return period per flood type
P_under1 = ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100']
P_1to10 = ['P_1in75', 'P_1in50', 'P_1in20']
P_over10 = ['P_1in10', 'P_1in5']
FU_under1 = ['FU_1in1000', 'FU_1in500', 'FU_1in250', 'FU_1in200', 'FU_1in100']
FU_1to10 = ['FU_1in75', 'FU_1in50', 'FU_1in20']
FU_over10 = ['FU_1in10', 'FU_1in5']
# create empty dictionary to store mode values
mode_dict = {}
# Calculate mode for each return period per flood type
for i in P_under1 + P_1to10 + P_over10 + FU_under1 + FU_1to10 + FU_over10:
# Get the data array from the xarray Dataset
data_array = data_dict[i].to_array()
# Calculate the mode along the 'band' dimension (axis=0)
mode_values = mode(data_array, axis=0, nan_policy='omit')
# Store the mode values in the dictionary
mode_dict[i] = mode_values.mode[0]
print(i, 'mode calculated:', mode_values.mode[0])
C:\Users\jtrum\AppData\Local\Temp\ipykernel_12616\3019214630.py:20: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning. mode_values = mode(data_array, axis=0, nan_policy='omit')
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Untitled-1.ipynb Cell 3 line 2 <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=17'>18</a> data_array = data_dict[i].to_array() <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=18'>19</a> # Calculate the mode along the 'band' dimension (axis=0) ---> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=19'>20</a> mode_values = mode(data_array, axis=0, nan_policy='omit') <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=20'>21</a> # Store the mode values in the dictionary <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=21'>22</a> mode_dict[i] = mode_values.mode[0] File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_stats_py.py:604, in mode(a, axis, nan_policy, keepdims) 602 if contains_nan and nan_policy == 'omit': 603 a = ma.masked_invalid(a) --> 604 return mstats_basic._mode(a, axis, keepdims=keepdims) 606 if not np.issubdtype(a.dtype, np.number): 607 warnings.warn("Support for non-numeric arrays has been deprecated " 608 "as of SciPy 1.9.0 and will be removed in " 609 "1.11.0. `pandas.DataFrame.mode` can be used instead, " 610 "see https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html.", # noqa: E501 611 DeprecationWarning, stacklevel=2) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_mstats_basic.py:354, in _mode(a, axis, keepdims) 352 output = (ma.array(output[0]), ma.array(output[1])) 353 else: --> 354 output = ma.apply_along_axis(_mode1D, axis, a) 355 if keepdims is None or keepdims: 356 newshape = list(a.shape) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\numpy\ma\extras.py:440, in apply_along_axis(func1d, axis, arr, *args, **kwargs) 438 i.put(indlist, ind) 439 j.put(indlist, ind) --> 440 res = func1d(arr[tuple(i.tolist())], *args, **kwargs) 441 outarr[tuple(flatten_inplace(j.tolist()))] = res 442 dtypes.append(asarray(res).dtype) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_mstats_basic.py:342, in _mode.<locals>._mode1D(a) 341 def _mode1D(a): --> 342 (rep,cnt) = find_repeats(a) 343 if not cnt.ndim: 344 return (0, 0) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_mstats_basic.py:186, in find_repeats(arr) 154 """Find repeats in arr and return a tuple (repeats, repeat_count). 155 156 The input is cast to float64. Masked values are discarded. (...) 182 183 """ 184 # Make sure we get a copy. ma.compressed promises a "new array", but can 185 # actually return a reference. --> 186 compr = np.asarray(ma.compressed(arr), dtype=np.float64) 187 try: 188 need_copy = np.may_share_memory(compr, arr) KeyboardInterrupt:
# set rasters to same CRS as catchment
for i in data_dict:
data_dict[i] = data_dict[i].rio.reproject(catchment.crs, resampling=0)
print(i, 'reprojected')
P_1in5 reprojected P_1in10 reprojected P_1in20 reprojected P_1in50 reprojected P_1in75 reprojected P_1in100 reprojected P_1in200 reprojected P_1in250 reprojected P_1in500 reprojected P_1in1000 reprojected FU_1in5 reprojected FU_1in10 reprojected FU_1in20 reprojected FU_1in50 reprojected FU_1in75 reprojected FU_1in100 reprojected FU_1in200 reprojected FU_1in250 reprojected FU_1in500 reprojected FU_1in1000 reprojected
import geopandas as gpd
from scipy.stats import mode
# Set the data directory
datadir = 'C:/Users/jtrum/world_bank/data/'
# Load the catchment boundary
catchment = gpd.read_file(datadir + 'catchment.geojson')
# Define the probability groups
groups = {
'low_prob': ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100'],
'medium_prob': ['P_1in75', 'P_1in50', 'P_1in20'],
'high_prob': ['P_1in10', 'P_1in5']
}
# Create empty DataArrays to store binary classifications
binary_classifications = {}
for group_name, group_rasters in groups.items():
binary_data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{group_rasters[0]}.tif')
for raster_name in group_rasters[1:]:
data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{raster_name}.tif')
binary_data = binary_data + data
# Set values to 1 if they are greater than or equal to half of the raster count
binary_data = (binary_data >= len(group_rasters) / 2).astype(float)
binary_classifications[group_name] = binary_data
print(group_name, 'done')
# Now, you have six binary classifications in the 'binary_classifications' dictionary
low_prob done medium_prob done high_prob done
# change the crs of low_prob, medium_prob, and high_prob to match catchment
for i in binary_classifications:
binary_classifications[i] = binary_classifications[i].rio.reproject(catchment.crs, resampling=0)
print(i, 'reprojected')
low_prob reprojected medium_prob reprojected high_prob reprojected
# crop the low_prob, medium_prob, and high_prob rasters to the extent of catchment basin
clip = catchment.geometry
for i in binary_classifications:
binary_classifications[i] = binary_classifications[i].rio.clip(clip, from_disk=True)
print(i, 'clipped')
low_prob clipped medium_prob clipped high_prob clipped
# view data as a dataframe
df = binary_classifications['low_prob'].to_dataframe()
df.head()
spatial_ref | band_data | |||
---|---|---|---|---|
band | y | x |
import xarray as xr
import geopandas as gpd
import numpy as np
from scipy.stats import mode
# Create groups for probability thresholds
thresholds = {
'low_prob': (0, 0.01), # <1% probability
'medium_prob': (0.01, 0.1), # 1-10% probability
'high_prob': (0.1, 1) # >10% probability
}
# Create a dictionary to store the grouped data
grouped_data = {key: {} for key in thresholds}
# Load and crop the rasters
for i in pluvial + fluvial_undefined:
data = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')
# Crop the data to the extent of the catchment
data = data.sel(x=slice(catchment.total_bounds[0], catchment.total_bounds[2]),
y=slice(catchment.total_bounds[1], catchment.total_bounds[3]))
# Group the data based on probability thresholds
for key, (min_prob, max_prob) in thresholds.items():
mask = (data['probability'] >= min_prob) & (data['probability'] < max_prob)
grouped_data[key][i] = data.where(mask, drop=True)
# Create a dictionary to store the binary classifications
binary_classification = {key: {} for key in thresholds}
# Create a binary classification based on majority values
for group, group_data in grouped_data.items():
for i in pluvial + fluvial_undefined:
binary_data = group_data[i].copy()
# Replace NaN values with -1 to handle them separately
binary_data = binary_data.fillna(-1)
# Compute the mode (majority value) for each cell
mode_result = mode(binary_data, axis=0, nan_policy='omit')
# Set majority value to 1, if there's a majority of 1 values
binary_data = np.where(mode_result.mode == 1, 1, 0)
# Set NaN values back to NaN
binary_data = binary_data.astype(float)
binary_classification[group][i] = xr.DataArray(binary_data, coords=binary_data.coords, dims=binary_data.dims)
# Now you have `binary_classification` dictionary containing binary data for each group.
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:1348, in Dataset._construct_dataarray(self, name) 1347 try: -> 1348 variable = self._variables[name] 1349 except KeyError: KeyError: 'probability' During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) Untitled-1.ipynb Cell 3 line 2 <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=21'>22</a> # Group the data based on probability thresholds <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=22'>23</a> for key, (min_prob, max_prob) in thresholds.items(): ---> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=23'>24</a> mask = (data['probability'] >= min_prob) & (data['probability'] < max_prob) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=24'>25</a> grouped_data[key][i] = data.where(mask, drop=True) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=26'>27</a> # Create a dictionary to store the binary classifications File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:1439, in Dataset.__getitem__(self, key) 1437 return self.isel(**key) 1438 if utils.hashable(key): -> 1439 return self._construct_dataarray(key) 1440 if utils.iterable_of_hashable(key): 1441 return self._copy_listed(key) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:1350, in Dataset._construct_dataarray(self, name) 1348 variable = self._variables[name] 1349 except KeyError: -> 1350 _, name, variable = _get_virtual_variable(self._variables, name, self.dims) 1352 needed_dims = set(variable.dims) 1354 coords: dict[Hashable, Variable] = {} File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:186, in _get_virtual_variable(variables, key, dim_sizes) 184 split_key = key.split(".", 1) 185 if len(split_key) != 2: --> 186 raise KeyError(key) 188 ref_name, var_name = split_key 189 ref_var = variables[ref_name] KeyError: 'probability'