'''
Data can be accessed
from the following link:
https://data.humdata.org/dataset?dataseries_name=Data+for+Good+at+Meta+-+High+Resolution+Population+Density+Maps+and+Demographic+Estimates
'''
# Access to improved water sources with school and hospital overlay and built-up underlay
# package import
import pandas as pd
import geopandas as gpd
import os
import osmnx as ox
import xarray as xr
import rioxarray as rxr
os.chdir(r'C:/Users/jtrum/world_bank/data/')
# read in improved water sources data
water = xr.open_rasterio('IHME_LMIC_WASH_2000_2017_W_IMP_PERCENT_MEAN_2017_Y2020M06D02.TIF')
# read in built-up data raster
built = xr.open_rasterio('WSF2019_v1_12_-10.tif')
# read in AOI
aoi = gpd.read_file('aoiLuanda.geojson')
# clip raster to AOI
clip = aoi.geometry
water_clip = water.rio.clip(clip, from_disk=True)
built_clip = built.rio.clip(clip, from_disk=True)
built = xr.open_dataset('WSF2019_v1_12_-10.tif')
built_clip = built.rio.clip(clip, from_disk=True)
built_gdf = built_clip.to_dataframe().reset_index()
# drop columns 'band' and 'spatial_ref'
built_gdf = built_gdf.drop(['band', 'spatial_ref'], axis=1)
# rename 'band_data' to 'value'
built_gdf = built_gdf.rename(columns={'band_data': 'value'})
# drop NaN values
built_gdf = built_gdf.dropna()
# convert 'x' and 'y' columns to geometry
built_gdf = gpd.GeoDataFrame(built_gdf, geometry=gpd.points_from_xy(built_gdf.x, built_gdf.y))
# set CRS to WGS84
built_gdf.crs = 'EPSG:4326'
# drop 'x' and 'y' columns
built_gdf = built_gdf.drop(['x', 'y'], axis=1)
# rename 0 values in 'value' to 'unbuilt' and 255 values to 'built'
built_gdf
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Untitled-1.ipynb Cell 5 line 2 <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W5sdW50aXRsZWQ%3D?line=0'>1</a> # drop columns 'band' and 'spatial_ref' ----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W5sdW50aXRsZWQ%3D?line=1'>2</a> built_gdf = built_gdf.drop(['band', 'spatial_ref'], axis=1) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W5sdW50aXRsZWQ%3D?line=2'>3</a> # rename 'band_data' to 'value' <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W5sdW50aXRsZWQ%3D?line=3'>4</a> built_gdf = built_gdf.rename(columns={'band_data': 'value'}) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\frame.py:5258, in DataFrame.drop(self, labels, axis, index, columns, level, inplace, errors) 5110 def drop( 5111 self, 5112 labels: IndexLabel = None, (...) 5119 errors: IgnoreRaise = "raise", 5120 ) -> DataFrame | None: 5121 """ 5122 Drop specified labels from rows or columns. 5123 (...) 5256 weight 1.0 0.8 5257 """ -> 5258 return super().drop( 5259 labels=labels, 5260 axis=axis, 5261 index=index, 5262 columns=columns, 5263 level=level, 5264 inplace=inplace, 5265 errors=errors, 5266 ) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\generic.py:4549, in NDFrame.drop(self, labels, axis, index, columns, level, inplace, errors) 4547 for axis, labels in axes.items(): 4548 if labels is not None: -> 4549 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 4551 if inplace: 4552 self._update_inplace(obj) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\generic.py:4591, in NDFrame._drop_axis(self, labels, axis, level, errors, only_slice) 4589 new_axis = axis.drop(labels, level=level, errors=errors) 4590 else: -> 4591 new_axis = axis.drop(labels, errors=errors) 4592 indexer = axis.get_indexer(new_axis) 4594 # Case for non-unique axis 4595 else: File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\pandas\core\indexes\base.py:6699, in Index.drop(self, labels, errors) 6697 if mask.any(): 6698 if errors != "ignore": -> 6699 raise KeyError(f"{list(labels[mask])} not found in axis") 6700 indexer = indexer[~mask] 6701 return self.delete(indexer) KeyError: "['band', 'spatial_ref'] not found in axis"
built_gdf['value'] = built_gdf['value'].replace({0: 'unbuilt', 255: 'built'})
from sklearn.cluster import DBSCAN
cluster = DBSCAN(eps=0.1, min_samples=5).fit(built_gdf.geometry).fit(built_gdf.geometry.values.reshape(-1, 1))
built_gdf['cluster'] = cluster.labels_
polygons = built_gdf.dissolve(by='cluster')
built_gdf
value | geometry | |
---|---|---|
4814 | unbuilt | POINT (12.99256 -9.06890) |
4815 | unbuilt | POINT (12.99256 -9.06899) |
4816 | unbuilt | POINT (12.99256 -9.06908) |
4817 | unbuilt | POINT (12.99256 -9.06917) |
4818 | unbuilt | POINT (12.99256 -9.06926) |
... | ... | ... |
56257733 | unbuilt | POINT (13.63387 -8.85321) |
56257734 | unbuilt | POINT (13.63387 -8.85330) |
56265612 | unbuilt | POINT (13.63396 -8.85312) |
56265613 | unbuilt | POINT (13.63396 -8.85321) |
56265614 | unbuilt | POINT (13.63396 -8.85330) |
24865520 rows × 2 columns
# binarize built clip so that 0's = not developed and 255's = developed
# built_clip = built_clip.where(built_clip == 255, 'developed')
# built_clip = built_clip.where(built_clip == 0, 'undeveloped')
# convert built_clip to geodataframe
built_gdf = built_clip.to_gdf()
# built_clip = built_clip.reset_index()
# built_clip = built_clip.drop(columns=['band'])
# built_clip = gpd.GeoDataFrame(built_clip, geometry=gpd.points_from_xy(built_clip.x, built_clip.y))
# built_clip = built_clip.drop(columns=['x', 'y'])
# built_clip = built_clip.set_crs('EPSG:4326')
built_clip
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Untitled-1.ipynb Cell 5 line 6 <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=0'>1</a> # binarize built clip so that 0's = not developed and 255's = developed <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=1'>2</a> # built_clip = built_clip.where(built_clip == 255, 'developed') <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=2'>3</a> # built_clip = built_clip.where(built_clip == 0, 'undeveloped') <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=3'>4</a> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=4'>5</a> # convert built_clip to geodataframe ----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=5'>6</a> built_gdf = built_clip.to_gdf() <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=6'>7</a> # built_clip = built_clip.reset_index() <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=7'>8</a> # built_clip = built_clip.drop(columns=['band']) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=8'>9</a> # built_clip = gpd.GeoDataFrame(built_clip, geometry=gpd.points_from_xy(built_clip.x, built_clip.y)) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=9'>10</a> # built_clip = built_clip.drop(columns=['x', 'y']) <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=10'>11</a> # built_clip = built_clip.set_crs('EPSG:4326') <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W3sdW50aXRsZWQ%3D?line=11'>12</a> built_clip File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\xarray\core\common.py:278, in AttrAccessMixin.__getattr__(self, name) 276 with suppress(KeyError): 277 return source[name] --> 278 raise AttributeError( 279 f"{type(self).__name__!r} object has no attribute {name!r}" 280 ) AttributeError: 'DataArray' object has no attribute 'to_gdf'