#!/usr/bin/env python # coding: utf-8 # In[19]: # raster to geodataframe # import geopandas as gpd import rasterio as rio from rasterio.plot import show from rasterio.features import geometry_mask from rasterio.mask import mask import matplotlib.pyplot as plt import numpy as np import pandas as pd import os import glob import xarray as xr import rioxarray as rxr # path for output outputPath = r"C:/Users/jtrum/Desktop/wb_outputs" os.chdir(r'C:/Users/jtrum/world_bank/data/') aoi = gpd.read_file('aoiLuanda.geojson') # Fathom # **IHME (access to improved water sources)** # **IHME (access to improved sanitation)** # **IHME (reliance on open defecation)** # HDSL (children) # HDSL (elderly) # HDSL (women of reproductive age) # **WSF 2019 (built-up area)** # ### IHME (water) # In[90]: improved_water_sources = xr.open_rasterio(r"C:/Users/jtrum/world_bank/data/IHME_LMIC_WASH_2000_2017_W_IMP_PERCENT_MEAN_2017_Y2020M06D02.tif") improved_water_sources = improved_water_sources.squeeze().drop(labels="band") improved_water_sources = improved_water_sources.rename({"x":"longitude", "y":"latitude"}) #convert to geodataframe improved_water_sources = improved_water_sources.to_dataframe(name="pct") #turn latitude and longitude into columns improved_water_sources.reset_index(inplace=True) improved_water_sources_gdf = gpd.GeoDataFrame(improved_water_sources, geometry=gpd.points_from_xy(improved_water_sources.longitude, improved_water_sources.latitude)) improved_water_sources_gdf.set_crs(epsg=4326, inplace=True) improved_water_sources_gdf_aoi = gpd.clip(improved_water_sources_gdf, aoi) improved_water_sources_gdf_aoi.to_file(os.path.join(outputPath, "improved_water_sources_gdf_aoi.geojson")) print("File saved to improved_water_sources_gdf_aoi.geojson") improved_water_sources_gdf_aoi # In[ ]: improved_water_sources = xr.open_rasterio(r"C:/Users/jtrum/world_bank/data/IHME_LMIC_WASH_2000_2017_W_IMP_PERCENT_MEAN_2017_Y2020M06D02.tif") improved_water_sources = improved_water_sources.squeeze().drop(labels="band") improved_water_sources = improved_water_sources.rename({"x":"longitude", "y":"latitude"}) #convert to geodataframe improved_water_sources = improved_water_sources.to_dataframe(name="pct") #turn latitude and longitude into columns improved_water_sources.reset_index(inplace=True) improved_water_sources_gdf = gpd.GeoDataFrame(improved_water_sources, geometry=gpd.points_from_xy(improved_water_sources.longitude, improved_water_sources.latitude)) improved_water_sources_gdf.set_crs(epsg=4326, inplace=True) improved_water_sources_gdf_aoi = gpd.clip(improved_water_sources_gdf, aoi) improved_water_sources_gdf_aoi.to_file(os.path.join(outputPath, "improved_water_sources_gdf_aoi.geojson")) print("File saved to improved_water_sources_gdf_aoi.geojson") improved_water_sources_gdf_aoi # ### IHME (sanitation) # In[3]: improved_sanitation_sources = xr.open_rasterio(r"C:/Users/jtrum/world_bank/data/IHME_LMIC_WASH_2000_2017_S_IMP_PERCENT_MEAN_2017_Y2020M06D02.tif") improved_sanitation_sources = improved_sanitation_sources.squeeze().drop(labels="band") improved_sanitation_sources = improved_sanitation_sources.rename({"x":"longitude", "y":"latitude"}) #convert to geodataframe improved_sanitation_sources = improved_sanitation_sources.to_dataframe(name="pct") #turn latitude and longitude into columns improved_sanitation_sources.reset_index(inplace=True) improved_sanitation_sources_gdf = gpd.GeoDataFrame(improved_sanitation_sources, geometry=gpd.points_from_xy(improved_sanitation_sources.longitude, improved_sanitation_sources.latitude)) improved_sanitation_sources_gdf.set_crs(epsg=4326, inplace=True) improved_sanitation_sources_gdf_aoi = gpd.clip(improved_sanitation_sources_gdf, aoi) improved_sanitation_sources_gdf_aoi.to_file(os.path.join(outputPath, "improved_sanitation_sources_gdf_aoi.geojson")) print("File saved to improved_sanitation_sources_gdf_aoi.geojson") improved_sanitation_sources_gdf_aoi = improved_sanitation_sources_gdf_aoi[['pct', 'geometry']] # ### IHME (open defacation) # In[7]: open_def_sources = xr.open_rasterio(r"C:/Users/jtrum/world_bank/data/IHME_LMIC_WASH_2000_2017_S_OD_PERCENT_MEAN_2017_Y2020M06D02.tif") open_def_sources = open_def_sources.squeeze().drop(labels="band") open_def_sources = open_def_sources.rename({"x":"longitude", "y":"latitude"}) #convert to geodataframe open_def_sources = open_def_sources.to_dataframe(name="pct") #turn latitude and longitude into columns open_def_sources.reset_index(inplace=True) open_def_sources_gdf = gpd.GeoDataFrame(open_def_sources, geometry=gpd.points_from_xy(open_def_sources.longitude, open_def_sources.latitude)) open_def_sources_gdf.set_crs(epsg=4326, inplace=True) open_def_sources_gdf_aoi = gpd.clip(open_def_sources_gdf, aoi) open_def_sources_gdf_aoi.to_file(os.path.join(outputPath, "open_def_sources_gdf_aoi.geojson")) print("File saved to open_def_sources_gdf_aoi.geojson") open_def_sources_gdf_aoi = open_def_sources_gdf_aoi[['pct', 'geometry']] open_def_sources_gdf_aoi # ### Children under 5 # In[ ]: raster = rxr.open_rasterio(r"C:/Users/jtrum/world_bank/data/ago_children_under_five_2020.tif") clip_bound = aoi.geometry clip_raster = raster.rio.clip(clip_bound, from_disk=True) children_under_five = clip_raster.squeeze().drop(labels="band") children_under_five = children_under_five.rename({"x":"longitude", "y":"latitude"}) children_under_five = children_under_five.to_dataframe(name="pct") children_under_five = children_under_five.reset_index() children_under_five_gdf = gpd.GeoDataFrame(children_under_five, geometry=gpd.points_from_xy(children_under_five.longitude, children_under_five.latitude)) children_under_five_gdf.set_crs(epsg=4326, inplace=True) children_under_five_gdf_aoi = gpd.clip(children_under_five_gdf, aoi) children_under_five_gdf_aoi.dropna(inplace=True) children_under_five_gdf_aoi[['pct', 'geometry']] children_under_five_gdf_aoi.to_file(os.path.join(outputPath, "children_under_five_gdf_aoi.geojson")) # ### Elderly # In[34]: raster = rxr.open_rasterio(r"C:/Users/jtrum/world_bank/data/ago_elderly_60_plus_2020.tif") clip_bound = aoi.geometry clip_raster = raster.rio.clip(clip_bound, from_disk=True) children_under_five = clip_raster.squeeze().drop(labels="band") children_under_five = children_under_five.rename({"x":"longitude", "y":"latitude"}) children_under_five = children_under_five.to_dataframe(name="pct") children_under_five = children_under_five.reset_index() children_under_five_gdf = gpd.GeoDataFrame(children_under_five, geometry=gpd.points_from_xy(children_under_five.longitude, children_under_five.latitude)) children_under_five_gdf.set_crs(epsg=4326, inplace=True) children_under_five_gdf_aoi = gpd.clip(children_under_five_gdf, aoi) children_under_five_gdf_aoi.dropna(inplace=True) children_under_five_gdf_aoi[['pct', 'geometry']] children_under_five_gdf_aoi.to_file(os.path.join(outputPath, "elderly_over_60_gdf_aoi.geojson")) # ### Women # In[37]: raster = rxr.open_rasterio(r"C:/Users/jtrum/world_bank/data/ago_women_of_reproductive_age_15_49_2020.tif") clip_bound = aoi.geometry clip_raster = raster.rio.clip(clip_bound, from_disk=True) children_under_five = clip_raster.squeeze().drop(labels="band") children_under_five = children_under_five.rename({"x":"longitude", "y":"latitude"}) children_under_five = children_under_five.to_dataframe(name="pct") children_under_five = children_under_five.reset_index() children_under_five_gdf = gpd.GeoDataFrame(children_under_five, geometry=gpd.points_from_xy(children_under_five.longitude, children_under_five.latitude)) children_under_five_gdf.set_crs(epsg=4326, inplace=True) children_under_five_gdf_aoi = gpd.clip(children_under_five_gdf, aoi) children_under_five_gdf_aoi.dropna(inplace=True) children_under_five_gdf_aoi[['pct', 'geometry']] children_under_five_gdf_aoi.to_file(os.path.join(outputPath, "women_reproductive_age_gdf_aoi.geojson")) # In[38]: children_under_five_gdf_aoi['pct'].hist() # In[39]: len(children_under_five_gdf_aoi) # ### WSF 2019 # In[12]: #open wsf2019 raster built_up_area = xr.open_rasterio(r"C:/Users/jtrum/world_bank/data/wsf2019.tif") built_up_area = built_up_area.squeeze().drop(labels="band") built_up_area = built_up_area.rename({"x":"longitude", "y":"latitude"}) built_up_area # In[16]: import numpy as np # Assuming 'built_up_area' is the raster you want to crop built_up_area_data = built_up_area.read(1) # Read the raster data # Apply the mask to the raster data cropped_raster_data = np.ma.masked_array(built_up_area_data, mask) # Create a new rasterio dataset with the cropped data cropped_raster = rio.open( "C:/Users/jtrum/Desktop/wb_outputs/cropped_wsf2019.tif", "w", driver="GTiff", height=cropped_raster_data.shape[0], width=cropped_raster_data.shape[1], count=1, # Assuming a single band raster dtype=str(cropped_raster_data.dtype), crs=built_up_area.crs, transform=built_up_area.transform, ) # Write the cropped data to the new raster dataset cropped_raster.write(cropped_raster_data.filled(fill_value=0), 1) # Fill masked values with 0 or another suitable value cropped_raster.close() # In[26]: built_up_area = xr.open_rasterio(r"C:/Users/jtrum/world_bank/data/wsf2019.tif") built_up_area = built_up_area.squeeze().drop(labels="band") built_up_area = built_up_area.rename({"x":"longitude", "y":"latitude"}) built_up_area # In[27]: #convert to geodataframe built_up_area = built_up_area.to_dataframe(name="built_up_area") # In[28]: #drop zero values built_up_area_255 = built_up_area[built_up_area.built_up_area != 0] built_up_area_0 = built_up_area[built_up_area.built_up_area == 0] built_up_area_255 # In[29]: #turn latitude and longitude into columns built_up_area_255.reset_index(inplace=True) built_up_area_255 # In[42]: built_up_area_255_gdf = gpd.GeoDataFrame(built_up_area_255, geometry=gpd.points_from_xy(built_up_area_255.longitude, built_up_area_255.latitude)) # In[43]: built_up_area_255_gdf['built'] = "True" built_up_area_255_gdf = built_up_area_255_gdf.drop(columns=['built_up_area', 'latitude', 'longitude']) built_up_area_255_gdf # In[44]: #set up built_up_are_255_gdf to crs of aoi built_up_area_255_gdf = built_up_area_255_gdf.set_crs(epsg=4326) # In[40]: built_up_area_255_gdf # In[41]: aoi # In[45]: #only keep points within aoi built_up_area_255_gdf_aoi = gpd.sjoin(built_up_area_255_gdf, aoi, how="inner", op='intersects') # In[46]: print(len(built_up_area_255_gdf_aoi)) # In[47]: built_up_area_255_gdf_aoi # In[48]: #export as geojson to output folder built_up_area_255_gdf_aoi.to_file(r"C:/Users/jtrum/Desktop/wb_outputs/built_up_area_255_gdf_aoi.geojson", driver='GeoJSON')