Notebook

In [48]:

import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray as rxr
import os
import xarray as xr
import geopandas as gpd
import os
from osgeo import gdal


os.chdir(r'C:/Users/jtrum/world_bank/data/')

In [39]:

datadir = 'C:/Users/jtrum/world_bank/data/'

pluvial = ['P_1in5', 'P_1in10', 'P_1in20', 'P_1in50', 'P_1in75', 'P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000']
fluvial_undefined = ['FU_1in5', 'FU_1in10', 'FU_1in20', 'FU_1in50', 'FU_1in75', 'FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']

data_dict = {}

for i in pluvial:
    data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')

for i in fluvial_undefined:
    data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/fluvial_undefended/{i}.tif')

# crop the 20 rasters to the extent of catchment basin
catchment = gpd.read_file(datadir + 'catchment.geojson')
clip = catchment.geometry

# set crs of rasters to equal crs of catchment
for i in data_dict:
    data_dict[i] = data_dict[i].rio.write_crs(catchment.crs)
    # print crs of raster, then crs of catchment
    print(data_dict[i].rio.crs)
    print(catchment.crs)

# clip rasters to catchment extent
for i in data_dict:
    data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)
    print(i, 'clipped')

GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
P_1in5 clipped
P_1in10 clipped
P_1in20 clipped
P_1in50 clipped
P_1in75 clipped
P_1in100 clipped
P_1in200 clipped
P_1in250 clipped
P_1in500 clipped
P_1in1000 clipped
FU_1in5 clipped
FU_1in10 clipped
FU_1in20 clipped
FU_1in50 clipped
FU_1in75 clipped
FU_1in100 clipped
FU_1in200 clipped
FU_1in250 clipped
FU_1in500 clipped
FU_1in1000 clipped

In [50]:

def binarize_band(raster_path, threshold=0.15):
    raster_dataset = gdal.Open(raster_path, gdal.GA_Update)
    
    if raster_dataset is None:
        print(f"Unable to open {raster_path}")
        return
    
    band = raster_dataset.GetRasterBand(1)
    band_data = band.ReadAsArray()
    
    binary_data = (band_data > threshold).astype(int)
    
    new_band = raster_dataset.GetRasterBand(1)  # You can choose any band number you want
    new_band.WriteArray(binary_data)
    
    raster_dataset.FlushCache()
    raster_dataset = None

    print(f"Binarized {raster_path}")

# Define the threshold value
threshold = 0.15

# Clip rasters to catchment extent
for i in data_dict:
    data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)
    print(i, 'clipped')
    
    # Get the path to the clipped raster file
    clipped_raster_path = datadir + i + '_clipped.tif'
    data_dict[i].to_netcdf(clipped_raster_path)  # Save the clipped raster as a NetCDF
    
    # Binarize the band data in the clipped raster
    binarize_band(clipped_raster_path, threshold)

P_1in5 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in5_clipped.tif
P_1in10 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in10_clipped.tif
P_1in20 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in20_clipped.tif
P_1in50 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in50_clipped.tif
P_1in75 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in75_clipped.tif
P_1in100 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in100_clipped.tif
P_1in200 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in200_clipped.tif
P_1in250 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in250_clipped.tif
P_1in500 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in500_clipped.tif
P_1in1000 clipped
Binarized C:/Users/jtrum/world_bank/data/P_1in1000_clipped.tif
FU_1in5 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in5_clipped.tif
FU_1in10 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in10_clipped.tif
FU_1in20 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in20_clipped.tif
FU_1in50 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in50_clipped.tif
FU_1in75 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in75_clipped.tif
FU_1in100 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in100_clipped.tif
FU_1in200 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in200_clipped.tif
FU_1in250 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in250_clipped.tif
FU_1in500 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in500_clipped.tif
FU_1in1000 clipped
Binarized C:/Users/jtrum/world_bank/data/FU_1in1000_clipped.tif

In [53]:

# turn the clipped rasters into a dictionary
clipped_dict = {}

for i in pluvial:
    clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')

for i in fluvial_undefined:
    clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')

In [69]:

from scipy import stats

# Define the groups based on your criteria
groups = [
    ['P_1in5', 'P_1in10'],
    ['P_1in20', 'P_1in50', 'P_1in75'],
    ['P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000'],
    ['FU_1in5', 'FU_1in10'],
    ['FU_1in20', 'FU_1in50', 'FU_1in75'],
    ['FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']
]

def compute_mode_value(rasters):
    # Stack the rasters along a new dimension
    stacked_data = xr.concat([rasters[i]['band_data'] for i in range(len(rasters))], dim='raster')
    
    # Compute the mode along the 'raster' dimension
    mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')

    # Extract the mode values as a DataArray
    mode_values = xr.DataArray(mode_data.mode, dims=stacked_data.dims, coords=stacked_data.coords)

    return mode_values

# Create a dictionary to store mode values for each group
mode_values_dict = {}

# Compute mode values for each group
for group_indices, group_name in enumerate(groups):
    group_rasters = [clipped_dict[i] for i in group_name]
    mode_values = compute_mode_value(group_rasters)
    mode_values_dict[f'Group_{group_indices + 1}'] = mode_values

# The mode values for each group can be accessed as mode_values_dict['Group_1'], mode_values_dict['Group_2'], etc.

C:\Users\jtrum\AppData\Local\Temp\ipykernel_17268\1252790497.py:18: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')

In [75]:

# now plot the 6 groups post-binarization
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.gridspec as gridspec


# Define the groups based on your criteria

groups = [
    

Out[75]:

'FU_1in500'

In [17]:

#print as a dataframe 
for i in data_dict:
    data_dict[i] = data_dict[i].to_dataframe()
    # turn each dataframe into its own variable so we don't have to call the dictionary every time
    globals()[i] = data_dict[i]
    print(i, 'converted to dataframe')

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_17268\1667624259.py in ?()
      2 #print as a dataframe
----> 3 for i in data_dict:
      4     data_dict[i] = data_dict[i].to_dataframe()
      5     # turn each dataframe into its own variable so we don't have to call the dictionary every time
      6     globals()[i] = data_dict[i]

c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\generic.py in ?(self, name)
   5985             and name not in self._accessors
   5986             and self._info_axis._can_hold_identifiers_and_holds_name(name)
   5987         ):
   5988             return self[name]
-> 5989         return object.__getattribute__(self, name)

AttributeError: 'DataFrame' object has no attribute 'to_dataframe'

In [20]:

# use x and y to make into geodataframe
for i in data_dict:
    data_dict[i] = data_dict[i].reset_index()
    data_dict[i] = gpd.GeoDataFrame(data_dict[i], geometry=gpd.points_from_xy(data_dict[i].x, data_dict[i].y))
    print(i, 'converted to geodataframe')

P_1in5 converted to geodataframe
P_1in10 converted to geodataframe
P_1in20 converted to geodataframe
P_1in50 converted to geodataframe
P_1in75 converted to geodataframe
P_1in100 converted to geodataframe
P_1in200 converted to geodataframe
P_1in250 converted to geodataframe
P_1in500 converted to geodataframe
P_1in1000 converted to geodataframe
FU_1in5 converted to geodataframe
FU_1in10 converted to geodataframe
FU_1in20 converted to geodataframe
FU_1in50 converted to geodataframe
FU_1in75 converted to geodataframe
FU_1in100 converted to geodataframe
FU_1in200 converted to geodataframe
FU_1in250 converted to geodataframe
FU_1in500 converted to geodataframe

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Untitled-1.ipynb Cell 4 line 3
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=0'>1</a> # use x and y to make into geodataframe
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=1'>2</a> for i in data_dict:
----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=2'>3</a>     data_dict[i] = data_dict[i].reset_index()
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=3'>4</a>     data_dict[i] = gpd.GeoDataFrame(data_dict[i], geometry=gpd.points_from_xy(data_dict[i].x, data_dict[i].y))
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=4'>5</a>     print(i, 'converted to geodataframe')

TypeError: reset_index() missing 1 required positional argument: 'dims_or_levels'

In [30]:

# make a dictionary of the dataframes
df_dict = {'P_1in5', 
           'P_1in10',}

In [32]:

# for each dataframe, in the 'band_data' column, if the value is greater than or equal to 0.15, in a new column called 'threshold' put a 1, all else put a 0. use their actual dataframes and not the dictionary
P_1in5['threshold'] = P_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in10['threshold'] = P_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in20['threshold'] = P_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in50['threshold'] = P_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in75['threshold'] = P_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in100['threshold'] = P_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in200['threshold'] = P_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in250['threshold'] = P_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in500['threshold'] = P_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
P_1in1000['threshold'] = P_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in5['threshold'] = FU_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in10['threshold'] = FU_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in20['threshold'] = FU_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in50['threshold'] = FU_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in75['threshold'] = FU_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in100['threshold'] = FU_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in200['threshold'] = FU_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in250['threshold'] = FU_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in500['threshold'] = FU_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)
FU_1in1000['threshold'] = FU_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)

In [34]:

# make 6 groupings of the dataframes, where each will be a new dataframe using the x and y columns and then the threshold values from each of the dataframes in the group, with the column being named 'threshold' + the name of the dataframe
# group 1 (P_1in5, P_1in10)
G1 = pd.DataFrame(P_1in5, columns=['band', 'x', 'y'])
G1['threshold_P_1in5'] = P_1in5['threshold']
G1['threshold_P_1in10'] = P_1in10['threshold']

# group 2 (P_1in20, P_1in50, P_1in75)
G2 = pd.DataFrame(P_1in20, columns=['band', 'x', 'y'])
G2['threshold_P_1in20'] = P_1in20['threshold']
G2['threshold_P_1in50'] = P_1in50['threshold']
G2['threshold_P_1in75'] = P_1in75['threshold']

# group 3 (P_1in100, P_1in200, P_1in250, P_1in500, P_1in1000)
G3 = pd.DataFrame(P_1in100, columns=['band', 'x', 'y'])
G3['threshold_P_1in100'] = P_1in100['threshold']
G3['threshold_P_1in200'] = P_1in200['threshold']
G3['threshold_P_1in250'] = P_1in250['threshold']
G3['threshold_P_1in500'] = P_1in500['threshold']
G3['threshold_P_1in1000'] = P_1in1000['threshold']

# group 4 (FU_1in5, FU_1in10)
G4 = pd.DataFrame(FU_1in5, columns=['band', 'x', 'y'])
G4['threshold_FU_1in5'] = FU_1in5['threshold']
G4['threshold_FU_1in10'] = FU_1in10['threshold']

# group 5 (FU_1in20, FU_1in50, FU_1in75)
G5 = pd.DataFrame(FU_1in20, columns=['band', 'x', 'y'])
G5['threshold_FU_1in20'] = FU_1in20['threshold']
G5['threshold_FU_1in50'] = FU_1in50['threshold']
G5['threshold_FU_1in75'] = FU_1in75['threshold']

# group 6 (FU_1in100, FU_1in200, FU_1in250, FU_1in500, FU_1in1000)
G6 = pd.DataFrame(FU_1in100, columns=['band', 'x', 'y'])
G6['threshold_FU_1in100'] = FU_1in100['threshold']
G6['threshold_FU_1in200'] = FU_1in200['threshold']
G6['threshold_FU_1in250'] = FU_1in250['threshold']
G6['threshold_FU_1in500'] = FU_1in500['threshold']
G6['threshold_FU_1in1000'] = FU_1in1000['threshold']

In [37]:

# take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups
G1['threshold'] = G1[['threshold_P_1in5', 'threshold_P_1in10']].mode(axis=1)[0]
# print("G1 threshold column created")
# G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]
# print("G2 threshold column created")
# G3['threshold'] = G3[['threshold_P_1in100', 'threshold_P_1in200', 'threshold_P_1in250', 'threshold_P_1in500', 'threshold_P_1in1000']].mode(axis=1)[0]
# print("G3 threshold column created")
# G4['threshold'] = G4[['threshold_FU_1in5', 'threshold_FU_1in10']].mode(axis=1)[0]
# print("G4 threshold column created")
# G5['threshold'] = G5[['threshold_FU_1in20', 'threshold_FU_1in50', 'threshold_FU_1in75']].mode(axis=1)[0]
# print("G5 threshold column created")
# G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]
# print("G6 threshold column created")

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Untitled-1.ipynb Cell 8 line 2
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=0'>1</a> # take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups
----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=1'>2</a> G1['threshold'] = G1[['threshold_P_1in5', 'threshold_P_1in10']].mode(axis=1)[0]
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=2'>3</a> # print("G1 threshold column created")
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=3'>4</a> # G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]
      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=4'>5</a> # print("G2 threshold column created")
   (...)
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=11'>12</a> # G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=12'>13</a> # print("G6 threshold column created")

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:10746, in DataFrame.mode(self, axis, numeric_only, dropna)
  10743 def f(s):
  10744     return s.mode(dropna=dropna)
> 10746 data = data.apply(f, axis=axis)
  10747 # Ensure index is type stable (should always use int index)
  10748 if data.empty:

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:9423, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs)
   9412 from pandas.core.apply import frame_apply
   9414 op = frame_apply(
   9415     self,
   9416     func=func,
   (...)
   9421     kwargs=kwargs,
   9422 )
-> 9423 return op.apply().__finalize__(self, method="apply")

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\apply.py:678, in FrameApply.apply(self)
    675 elif self.raw:
    676     return self.apply_raw()
--> 678 return self.apply_standard()

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\apply.py:798, in FrameApply.apply_standard(self)
    797 def apply_standard(self):
--> 798     results, res_index = self.apply_series_generator()
    800     # wrap results
    801     return self.wrap_results(results, res_index)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\apply.py:814, in FrameApply.apply_series_generator(self)
    811 with option_context("mode.chained_assignment", None):
    812     for i, v in enumerate(series_gen):
    813         # ignore SettingWithCopy here in case the user mutates
--> 814         results[i] = self.f(v)
    815         if isinstance(results[i], ABCSeries):
    816             # If we have a view on v, we need to make a copy because
    817             #  series_generator will swap out the underlying data
    818             results[i] = results[i].copy(deep=False)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:10744, in DataFrame.mode.<locals>.f(s)
  10743 def f(s):
> 10744     return s.mode(dropna=dropna)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\series.py:2122, in Series.mode(self, dropna)
   2120 values = self._values
   2121 if isinstance(values, np.ndarray):
-> 2122     res_values = algorithms.mode(values, dropna=dropna)
   2123 else:
   2124     res_values = values._mode(dropna=dropna)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\algorithms.py:998, in mode(values, dropna, mask)
    996 npresult = htable.mode(values, dropna=dropna, mask=mask)
    997 try:
--> 998     npresult = np.sort(npresult)
    999 except TypeError as err:
   1000     warnings.warn(
   1001         f"Unable to sort modes: {err}",
   1002         stacklevel=find_stack_level(),
   1003     )

File <__array_function__ internals>:200, in sort(*args, **kwargs)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\numpy\core\fromnumeric.py:1029, in sort(a, axis, kind, order)
   1027     axis = -1
   1028 else:
-> 1029     a = asanyarray(a).copy(order="K")
   1030 a.sort(axis=axis, kind=kind, order=order)
   1031 return a

KeyboardInterrupt:

In [69]:

# turn all dataframes into own variables
for i in data_dict:
    globals()[i] = data_dict[i]
    print(i, 'converted to variable')

P_1in5 converted to variable
P_1in10 converted to variable
P_1in20 converted to variable
P_1in50 converted to variable
P_1in75 converted to variable
P_1in100 converted to variable
P_1in200 converted to variable
P_1in250 converted to variable
P_1in500 converted to variable
P_1in1000 converted to variable
FU_1in5 converted to variable
FU_1in10 converted to variable
FU_1in20 converted to variable
FU_1in50 converted to variable
FU_1in75 converted to variable
FU_1in100 converted to variable
FU_1in200 converted to variable
FU_1in250 converted to variable
FU_1in500 converted to variable
FU_1in1000 converted to variable

In [80]:

# drop na values
for i in data_dict:
    data_dict[i] = data_dict[i].dropna()
    print(i, 'dropped na values')

len(P_1in5)

P_1in5 dropped na values
P_1in10 dropped na values
P_1in20 dropped na values
P_1in50 dropped na values
P_1in75 dropped na values
P_1in100 dropped na values
P_1in200 dropped na values
P_1in250 dropped na values
P_1in500 dropped na values
P_1in1000 dropped na values
FU_1in5 dropped na values
FU_1in10 dropped na values
FU_1in20 dropped na values
FU_1in50 dropped na values
FU_1in75 dropped na values
FU_1in100 dropped na values
FU_1in200 dropped na values
FU_1in250 dropped na values
FU_1in500 dropped na values
FU_1in1000 dropped na values

Out[80]:

In [63]:

# import mode function
from scipy.stats import mode

# create grouping categories per return period per flood type
P_under1 = ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100']
P_1to10 = ['P_1in75', 'P_1in50', 'P_1in20'] 
P_over10 = ['P_1in10', 'P_1in5']
FU_under1 = ['FU_1in1000', 'FU_1in500', 'FU_1in250', 'FU_1in200', 'FU_1in100']
FU_1to10 = ['FU_1in75', 'FU_1in50', 'FU_1in20']
FU_over10 = ['FU_1in10', 'FU_1in5']

# create empty dictionary to store mode values
mode_dict = {}

# Calculate mode for each return period per flood type
for i in P_under1 + P_1to10 + P_over10 + FU_under1 + FU_1to10 + FU_over10:
    # Get the data array from the xarray Dataset
    data_array = data_dict[i].to_array()
    # Calculate the mode along the 'band' dimension (axis=0)
    mode_values = mode(data_array, axis=0, nan_policy='omit')
    # Store the mode values in the dictionary
    mode_dict[i] = mode_values.mode[0]

    print(i, 'mode calculated:', mode_values.mode[0])

C:\Users\jtrum\AppData\Local\Temp\ipykernel_12616\3019214630.py:20: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode_values = mode(data_array, axis=0, nan_policy='omit')

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Untitled-1.ipynb Cell 3 line 2
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=17'>18</a> data_array = data_dict[i].to_array()
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=18'>19</a> # Calculate the mode along the 'band' dimension (axis=0)
---> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=19'>20</a> mode_values = mode(data_array, axis=0, nan_policy='omit')
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=20'>21</a> # Store the mode values in the dictionary
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=21'>22</a> mode_dict[i] = mode_values.mode[0]

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_stats_py.py:604, in mode(a, axis, nan_policy, keepdims)
    602 if contains_nan and nan_policy == 'omit':
    603     a = ma.masked_invalid(a)
--> 604     return mstats_basic._mode(a, axis, keepdims=keepdims)
    606 if not np.issubdtype(a.dtype, np.number):
    607     warnings.warn("Support for non-numeric arrays has been deprecated "
    608                   "as of SciPy 1.9.0 and will be removed in "
    609                   "1.11.0. `pandas.DataFrame.mode` can be used instead, "
    610                   "see https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html.",  # noqa: E501
    611                   DeprecationWarning, stacklevel=2)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_mstats_basic.py:354, in _mode(a, axis, keepdims)
    352     output = (ma.array(output[0]), ma.array(output[1]))
    353 else:
--> 354     output = ma.apply_along_axis(_mode1D, axis, a)
    355     if keepdims is None or keepdims:
    356         newshape = list(a.shape)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\numpy\ma\extras.py:440, in apply_along_axis(func1d, axis, arr, *args, **kwargs)
    438 i.put(indlist, ind)
    439 j.put(indlist, ind)
--> 440 res = func1d(arr[tuple(i.tolist())], *args, **kwargs)
    441 outarr[tuple(flatten_inplace(j.tolist()))] = res
    442 dtypes.append(asarray(res).dtype)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_mstats_basic.py:342, in _mode.<locals>._mode1D(a)
    341 def _mode1D(a):
--> 342     (rep,cnt) = find_repeats(a)
    343     if not cnt.ndim:
    344         return (0, 0)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\scipy\stats\_mstats_basic.py:186, in find_repeats(arr)
    154 """Find repeats in arr and return a tuple (repeats, repeat_count).
    155 
    156 The input is cast to float64. Masked values are discarded.
   (...)
    182 
    183 """
    184 # Make sure we get a copy. ma.compressed promises a "new array", but can
    185 # actually return a reference.
--> 186 compr = np.asarray(ma.compressed(arr), dtype=np.float64)
    187 try:
    188     need_copy = np.may_share_memory(compr, arr)

KeyboardInterrupt:

In [47]:

# set rasters to same CRS as catchment
for i in data_dict:
    data_dict[i] = data_dict[i].rio.reproject(catchment.crs, resampling=0)
    print(i, 'reprojected')

P_1in5 reprojected
P_1in10 reprojected
P_1in20 reprojected
P_1in50 reprojected
P_1in75 reprojected
P_1in100 reprojected
P_1in200 reprojected
P_1in250 reprojected
P_1in500 reprojected
P_1in1000 reprojected
FU_1in5 reprojected
FU_1in10 reprojected
FU_1in20 reprojected
FU_1in50 reprojected
FU_1in75 reprojected
FU_1in100 reprojected
FU_1in200 reprojected
FU_1in250 reprojected
FU_1in500 reprojected
FU_1in1000 reprojected

In [48]:

import geopandas as gpd
from scipy.stats import mode

# Set the data directory
datadir = 'C:/Users/jtrum/world_bank/data/'

# Load the catchment boundary
catchment = gpd.read_file(datadir + 'catchment.geojson')

# Define the probability groups
groups = {
    'low_prob': ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100'],
    'medium_prob': ['P_1in75', 'P_1in50', 'P_1in20'],
    'high_prob': ['P_1in10', 'P_1in5']
}

# Create empty DataArrays to store binary classifications
binary_classifications = {}

for group_name, group_rasters in groups.items():
    binary_data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{group_rasters[0]}.tif')
    
    for raster_name in group_rasters[1:]:
        data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{raster_name}.tif')
        binary_data = binary_data + data
    
    # Set values to 1 if they are greater than or equal to half of the raster count
    binary_data = (binary_data >= len(group_rasters) / 2).astype(float)
    
    binary_classifications[group_name] = binary_data
    print(group_name, 'done')

# Now, you have six binary classifications in the 'binary_classifications' dictionary

low_prob done
medium_prob done
high_prob done

In [49]:

# change the crs of low_prob, medium_prob, and high_prob to match catchment
for i in binary_classifications:
    binary_classifications[i] = binary_classifications[i].rio.reproject(catchment.crs, resampling=0)
    print(i, 'reprojected')

low_prob reprojected
medium_prob reprojected
high_prob reprojected

In [50]:

# crop the low_prob, medium_prob, and high_prob rasters to the extent of catchment basin
clip = catchment.geometry

for i in binary_classifications:
    binary_classifications[i] = binary_classifications[i].rio.clip(clip, from_disk=True)
    print(i, 'clipped')

low_prob clipped
medium_prob clipped
high_prob clipped

In [32]:

# view data as a dataframe
df = binary_classifications['low_prob'].to_dataframe()
df.head()

Out[32]:

			spatial_ref	band_data
band	y	x

In [25]:

import xarray as xr
import geopandas as gpd
import numpy as np
from scipy.stats import mode

# Create groups for probability thresholds
thresholds = {
    'low_prob': (0, 0.01),  # <1% probability
    'medium_prob': (0.01, 0.1),  # 1-10% probability
    'high_prob': (0.1, 1)  # >10% probability
}

# Create a dictionary to store the grouped data
grouped_data = {key: {} for key in thresholds}

# Load and crop the rasters
for i in pluvial + fluvial_undefined:
    data = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')
    # Crop the data to the extent of the catchment
    data = data.sel(x=slice(catchment.total_bounds[0], catchment.total_bounds[2]),
                    y=slice(catchment.total_bounds[1], catchment.total_bounds[3]))
    # Group the data based on probability thresholds
    for key, (min_prob, max_prob) in thresholds.items():
        mask = (data['probability'] >= min_prob) & (data['probability'] < max_prob)
        grouped_data[key][i] = data.where(mask, drop=True)

# Create a dictionary to store the binary classifications
binary_classification = {key: {} for key in thresholds}

# Create a binary classification based on majority values
for group, group_data in grouped_data.items():
    for i in pluvial + fluvial_undefined:
        binary_data = group_data[i].copy()
        # Replace NaN values with -1 to handle them separately
        binary_data = binary_data.fillna(-1)
        # Compute the mode (majority value) for each cell
        mode_result = mode(binary_data, axis=0, nan_policy='omit')
        # Set majority value to 1, if there's a majority of 1 values
        binary_data = np.where(mode_result.mode == 1, 1, 0)
        # Set NaN values back to NaN
        binary_data = binary_data.astype(float)
        binary_classification[group][i] = xr.DataArray(binary_data, coords=binary_data.coords, dims=binary_data.dims)

# Now you have `binary_classification` dictionary containing binary data for each group.

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:1348, in Dataset._construct_dataarray(self, name)
   1347 try:
-> 1348     variable = self._variables[name]
   1349 except KeyError:

KeyError: 'probability'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
Untitled-1.ipynb Cell 3 line 2
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=21'>22</a>     # Group the data based on probability thresholds
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=22'>23</a>     for key, (min_prob, max_prob) in thresholds.items():
---> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=23'>24</a>         mask = (data['probability'] >= min_prob) & (data['probability'] < max_prob)
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=24'>25</a>         grouped_data[key][i] = data.where(mask, drop=True)
     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=26'>27</a> # Create a dictionary to store the binary classifications

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:1439, in Dataset.__getitem__(self, key)
   1437     return self.isel(**key)
   1438 if utils.hashable(key):
-> 1439     return self._construct_dataarray(key)
   1440 if utils.iterable_of_hashable(key):
   1441     return self._copy_listed(key)

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:1350, in Dataset._construct_dataarray(self, name)
   1348     variable = self._variables[name]
   1349 except KeyError:
-> 1350     _, name, variable = _get_virtual_variable(self._variables, name, self.dims)
   1352 needed_dims = set(variable.dims)
   1354 coords: dict[Hashable, Variable] = {}

File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\dataset.py:186, in _get_virtual_variable(variables, key, dim_sizes)
    184 split_key = key.split(".", 1)
    185 if len(split_key) != 2:
--> 186     raise KeyError(key)
    188 ref_name, var_name = split_key
    189 ref_var = variables[ref_name]

KeyError: 'probability'