This code chunk will process your raster and turn it into a shapefile, then re-import the shapefile and clip it to the extent of AOI (or catchment) and export that as a geojson as the final output for vectorization. The code beneath is another method to automatically process the raster as a geojson instead of as a shapefile then re-importing, clipping to extent, and exporting, but I have found the first method does take less time.
from osgeo import gdal, ogr, osr
import geopandas as gpd
raster = gdal.Open(r"C:/Users/jtrum/world_bank/data/IHME_LMIC_WASH_2000_2017_S_OD_PERCENT_MEAN_2017_Y2020M06D02.tif")
band = raster.GetRasterBand(1) # can use whichever band, but 1 works fine in most cases
band.ReadAsArray()
proj = raster.GetProjection()
shp_proj = osr.SpatialReference()
shp_proj.ImportFromWkt(proj)
output_file = 'C:/Users/jtrum/Desktop/wb_outputs/sanitation.shp' # change to your output path
call_drive = ogr.GetDriverByName('ESRI Shapefile') # exports as shp
create_shp = call_drive.CreateDataSource(output_file)
shp_layer = create_shp.CreateLayer('pct', srs=shp_proj) # name of layer in shp
new_field = ogr.FieldDefn(str('ID'), ogr.OFTInteger)# will become the column name in geodataframe that raster values are extrapolated from
shp_layer.CreateField(new_field)
gdal.Polygonize(band, None, shp_layer, 0, [], callback=None)
create_shp.Destroy()
raster = None
# re-import the shapefile as a geodataframe
gdf = gpd.read_file(r"C:/Users/jtrum/Desktop/wb_outputs/sanitation.shp")
# # load in aoi as geodataframe
aoi = gpd.read_file('C:/Users/jtrum/Desktop/wb_outputs/aoiLuanda.geojson')
# ensure matching crs
gdf.crs = aoi.crs
# clip the shapefile to the aoi
gdf = gpd.clip(gdf, aoi)
# export to geojson
gdf.to_file(r"C:/Users/jtrum/Desktop/wb_outputs/sanitation.geojson", driver='GeoJSON')
from osgeo import gdal, ogr, osr
raster = gdal.Open(r"C:/Users/jtrum/world_bank/data/IHME_LMIC_WASH_2000_2017_S_OD_PERCENT_MEAN_2017_Y2020M06D02.tif")
band = raster.GetRasterBand(1)
band_array = band.ReadAsArray()
proj = raster.GetProjection()
shp_proj = osr.SpatialReference()
shp_proj.ImportFromWkt(proj)
output_file = 'C:/Users/jtrum/Desktop/wb_outputs/sanitation.geojson'
driver = ogr.GetDriverByName('GeoJSON')
create_geojson = driver.CreateDataSource(output_file)
geojson_layer = create_geojson.CreateLayer('pct', srs=shp_proj, geom_type=ogr.wkbPolygon)
new_field = ogr.FieldDefn(str('ID'), ogr.OFTInteger)
geojson_layer.CreateField(new_field)
gdal.Polygonize(band, None, geojson_layer, 0, [], callback=None)
create_geojson.Destroy()
raster = None