import tifffile as tf
import numpy as np
import rasterio as rio
import xarray as xr
datadir = 'C:/Users/jtrum/world_bank/data/'
wsf = tf.imread(datadir + 'WSF2019_v1_12_-10.tif')
# Open the GeoTIFF file
with rio.open('C:/Users/jtrum/world_bank/data/WSF2019_v1_12_-10.tif') as src:
# You can now work with the raster data using 'src'
# For example, you can access metadata like CRS, transform, and more:
crs = src.crs
transform = src.transform
width = src.width
height = src.height
count = src.count # Number of bands
dtype = src.dtypes[0] # Data type of the first band
# Read a specific band from the raster (e.g., band 1)
band1 = src.read(1)
# Read the entire raster as a NumPy array (all bands)
full_raster = src.read()
full_raster.shape
(1, 22487, 22487)
full_raster[0, 0, 0]
0
#see unique values in raster and how many of each
unique, counts = np.unique(full_raster, return_counts=True)
counts
array([499966267, 5698902], dtype=int64)
#make a plot of the raster
import matplotlib.pyplot as plt
plt.imshow(band1)
plt.show()
# remove 9000 pixels from the left side of the raster, 2000 pixels from the top, and 7000 pixels from the bottom
band1 = band1[2000:-7000, 9000:]
#see shape of raster
band1.shape
(13487, 13487)
13487*13487
181899169
#convert to pandas
import pandas as pd
df = pd.DataFrame(band1)
import georasters as gr
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\osgeo\__init__.py:30, in swig_import_helper() 29 try: ---> 30 return importlib.import_module(mname) 31 except ImportError: File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\importlib\__init__.py:127, in import_module(name, package) 126 level += 1 --> 127 return _bootstrap._gcd_import(name[level:], package, level) File <frozen importlib._bootstrap>:1014, in _gcd_import(name, package, level) File <frozen importlib._bootstrap>:991, in _find_and_load(name, import_) File <frozen importlib._bootstrap>:975, in _find_and_load_unlocked(name, import_) File <frozen importlib._bootstrap>:657, in _load_unlocked(spec) File <frozen importlib._bootstrap>:556, in module_from_spec(spec) File <frozen importlib._bootstrap_external>:1166, in create_module(self, spec) File <frozen importlib._bootstrap>:219, in _call_with_frames_removed(f, *args, **kwds) ImportError: DLL load failed while importing _gdal: The specified module could not be found. During handling of the above exception, another exception occurred: ImportError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 14 line 1 ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y106sZmlsZQ%3D%3D?line=0'>1</a> import georasters as gr File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\georasters\__init__.py:3 1 # This file allows all subdirectories in this directory to be loaded by Python 2 # -*- coding: utf-8 -*- ----> 3 from .georasters import get_geo_info, map_pixel, map_pixel_inv, aggregate, create_geotiff, align_rasters, \ 4 load_tiff, union, GeoRaster, RasterGeoError, RasterGeoTError, RasterGeoTWarning, merge, \ 5 from_file, to_pandas, to_geopandas, from_pandas, raster_weights, map_vector, \ 6 align_georasters 8 __all__ = (['get_geo_info','map_pixel', 'map_pixel_inv','aggregate','create_geotiff','align_rasters', \ 9 'load_tiff', 'union', 'GeoRaster', 'RasterGeoError', 'RasterGeoTError', \ 10 'RasterGeoTWarning', 'merge', 'from_file','to_pandas','to_geopandas', 'from_pandas', \ 11 'raster_weights', 'map_vector', 'align_georasters']) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\georasters\georasters.py:33 31 from __future__ import division 32 import numpy as np ---> 33 from osgeo import gdal, gdalnumeric, ogr, osr, gdal_array 34 try: 35 from gdalconst import GA_ReadOnly File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\osgeo\__init__.py:46 42 raise ImportError(traceback_string + '\n' + msg) 43 return importlib.import_module('_gdal') ---> 46 _gdal = swig_import_helper() 47 del swig_import_helper 49 __version__ = _gdal.__version__ = _gdal.VersionInfo("RELEASE_NAME") File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\osgeo\__init__.py:42, in swig_import_helper() 40 import traceback 41 traceback_string = ''.join(traceback.format_exception(*sys.exc_info())) ---> 42 raise ImportError(traceback_string + '\n' + msg) 43 return importlib.import_module('_gdal') ImportError: Traceback (most recent call last): File "c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\osgeo\__init__.py", line 30, in swig_import_helper return importlib.import_module(mname) File "c:\Users\jtrum\miniconda3\envs\wash_scan\lib\importlib\__init__.py", line 127, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "<frozen importlib._bootstrap>", line 1014, in _gcd_import File "<frozen importlib._bootstrap>", line 991, in _find_and_load File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked File "<frozen importlib._bootstrap>", line 657, in _load_unlocked File "<frozen importlib._bootstrap>", line 556, in module_from_spec File "<frozen importlib._bootstrap_external>", line 1166, in create_module File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed ImportError: DLL load failed while importing _gdal: The specified module could not be found. On Windows, with Python >= 3.8, DLLs are no longer imported from the PATH. If gdalXXX.dll is in the PATH, then set the USE_PATH_FOR_GDAL_PYTHON=YES environment variable to feed the PATH into os.add_dll_directory().
df
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 13477 | 13478 | 13479 | 13480 | 13481 | 13482 | 13483 | 13484 | 13485 | 13486 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
13482 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13483 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13484 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13485 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13486 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13487 rows × 13487 columns
# Open the TIFF image
with rio.open('C:/Users/jtrum/world_bank/data/WSF2019_v1_12_-10.tif') as src:
luanda_bbox = aoi.total_bounds # Get the bounding box of Luanda
window = src.window(*luanda_bbox) # Calculate the window for cropping
cropped_data = src.read(1, window=window) # Read the data from the window
# Ensure that only 0 and 255 values are retained
cropped_data[cropped_data != 0] = 255
--------------------------------------------------------------------------- NameError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 5 line 3 <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X62sZmlsZQ%3D%3D?line=0'>1</a> # Open the TIFF image <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X62sZmlsZQ%3D%3D?line=1'>2</a> with rio.open('C:/Users/jtrum/world_bank/data/WSF2019_v1_12_-10.tif') as src: ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X62sZmlsZQ%3D%3D?line=2'>3</a> luanda_bbox = aoi.total_bounds # Get the bounding box of Luanda <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X62sZmlsZQ%3D%3D?line=3'>4</a> window = src.window(*luanda_bbox) # Calculate the window for cropping <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X62sZmlsZQ%3D%3D?line=4'>5</a> cropped_data = src.read(1, window=window) # Read the data from the window NameError: name 'aoi' is not defined
import tifffile as tf
import numpy as np
import geopandas as gpd
import pandas as pd
import rasterio as rio
from rasterio.mask import mask
from rasterio.plot import show
from shapely.geometry import mapping
from rasterio.windows import Window
datadir = 'C:/Users/jtrum/world_bank/data/'
wsf = tf.imread(datadir + 'WSF2019_v1_12_-10.tif')
aoi = gpd.read_file(datadir + 'luanda2clean.shp')
c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn( C:\Users\jtrum\AppData\Local\Temp\ipykernel_8512\2310863835.py:3: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas: import os os.environ['USE_PYGEOS'] = '0' import geopandas In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html). import geopandas as gpd
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 2 line 3 <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W0sZmlsZQ%3D%3D?line=0'>1</a> import tifffile as tf <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W0sZmlsZQ%3D%3D?line=1'>2</a> import numpy as np ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W0sZmlsZQ%3D%3D?line=2'>3</a> import geopandas as gpd <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W0sZmlsZQ%3D%3D?line=3'>4</a> import pandas as pd <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W0sZmlsZQ%3D%3D?line=4'>5</a> import rasterio as rio File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\__init__.py:1 ----> 1 from geopandas._config import options # noqa 3 from geopandas.geoseries import GeoSeries # noqa 4 from geopandas.geodataframe import GeoDataFrame # noqa File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\_config.py:109 102 import geopandas._compat as compat 104 compat.set_use_pygeos(value) 107 use_pygeos = Option( 108 key="use_pygeos", --> 109 default_value=_default_use_pygeos(), 110 doc=( 111 "Whether to use PyGEOS to speed up spatial operations. The default is True " 112 "if PyGEOS is installed, and follows the USE_PYGEOS environment variable " 113 "if set." 114 ), 115 validator=_validate_bool, 116 callback=_callback_use_pygeos, 117 ) 120 options = Options({"display_precision": display_precision, "use_pygeos": use_pygeos}) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\_config.py:95, in _default_use_pygeos() 94 def _default_use_pygeos(): ---> 95 import geopandas._compat as compat 97 return compat.USE_PYGEOS File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\_compat.py:262 255 HAS_RTREE = False 258 # ----------------------------------------------------------------------------- 259 # pyproj compat 260 # ----------------------------------------------------------------------------- --> 262 PYPROJ_GE_31 = Version(pyproj.__version__) >= Version("3.1") 263 PYPROJ_GE_32 = Version(pyproj.__version__) >= Version("3.2") AttributeError: module 'pyproj' has no attribute '__version__'
--------------------------------------------------------------------------- NameError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 5 line 3 <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W1sZmlsZQ%3D%3D?line=0'>1</a> # Open the TIFF image <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W1sZmlsZQ%3D%3D?line=1'>2</a> with rio.open('C:/Users/jtrum/world_bank/data/WSF2019_v1_12_-10.tif') as src: ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W1sZmlsZQ%3D%3D?line=2'>3</a> luanda_bbox = aoi.total_bounds # Get the bounding box of Luanda <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W1sZmlsZQ%3D%3D?line=3'>4</a> window = src.window(*luanda_bbox) # Calculate the window for cropping <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#W1sZmlsZQ%3D%3D?line=4'>5</a> cropped_data = src.read(1, window=window) # Read the data from the window NameError: name 'aoi' is not defined
cropped_data
array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
# Replace 'input.tif' with your input TIFF file and 'output.tif' with the desired output file name.
input_tiff_path = datadir + 'wsf2019.tif'
output_tiff_path = datadir + 'output.tif'
# Load the input TIFF file
with rio.open(input_tiff_path) as src:
# Define CRS (Coordinate Reference System) for the output file
crs = src.crs
# Define geotransform parameters (affine transformation)
# You may need to adjust these based on your specific data
# (left, top), (x_resolution, y_resolution)
transform = from_origin(src.bounds.left, src.bounds.top, src.res[0], src.res[1])
# Create a new GeoTIFF file with CRS and transform
with rio.open(output_tiff_path, 'w', driver='GTiff', width=src.width, height=src.height,
count=src.count, dtype=src.dtypes[0], crs=crs, transform=transform) as dst:
# Copy the data from the input TIFF to the output TIFF
dst.write(src.read())
print(f"GeoTIFF file with CRS and transform created: {output_tiff_path}")
GeoTIFF file with CRS and transform created: C:/Users/jtrum/world_bank/data/output.tif
# Count the occurrences of each unique value
unique_values, value_counts = np.unique(ws, return_counts=True)
# Print the unique values and their counts
for value, count in zip(unique_values, value_counts):
print(f"Value: {value}, Count: {count}")
Value: 0, Count: 50776044 Value: 255, Count: 5495036
wsf2019 = tf.imread(datadir + 'wsf2019.tif')
wsf2019.shape
(850, 771)
import matplotlib.pyplot as plt
wsf2019
array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
# make a plot of the aoi with the tiff data overlaid
fig, ax = plt.subplots(figsize=(10, 10))
aoi.plot(ax=ax, color='red')
show(wsf2019, ax=ax, cmap='gray', alpha=0.5)
water_infrastructure.plot(ax=ax, color='blue', alpha=0.5)
<Axes: >
# export the cropped data as a GeoTIFF
with rio.open('C:/Users/jtrum/world_bank/data/wsf2019.tif', 'w', **src.profile) as dest:
dest.write(cropped_data, 1)
import osmnx as ox
tags_list = [
{'landuse': ['reservoir', 'basin']},
{'amenity': ['drinking_water', 'watering_place', 'water_point']},
{'man_made': ['water_well', 'water_tower', 'water_works', 'reservoir_covered', 'storage_tank', 'monitoring_station', 'wastewater_plant', 'watermill', 'pipeline']}
]
water_infrastructure = pd.DataFrame(columns=['feature', 'geometry'])
for tags in tags_list:
data = ox.geometries_from_polygon(aoi.geometry[0], tags=tags)
data = data[['geometry']]
data['feature'] = list(tags.keys())[0] # Extract the feature type from the tags
water_infrastructure = water_infrastructure.append(data)
water_infrastructure = water_infrastructure.reset_index(drop=True)
water_infrastructure = gpd.GeoDataFrame(water_infrastructure, geometry='geometry', crs=aoi.crs)
C:\Users\jtrum\AppData\Local\Temp\ipykernel_25404\1104742602.py:10: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release. data = ox.geometries_from_polygon(aoi.geometry[0], tags=tags) C:\Users\jtrum\AppData\Local\Temp\ipykernel_25404\1104742602.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. water_infrastructure = water_infrastructure.append(data) c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\array.py:1406: UserWarning: CRS not set for some of the concatenation inputs. Setting output's CRS as WGS 84 (the single non-null crs provided). warnings.warn( C:\Users\jtrum\AppData\Local\Temp\ipykernel_25404\1104742602.py:10: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release. data = ox.geometries_from_polygon(aoi.geometry[0], tags=tags) C:\Users\jtrum\AppData\Local\Temp\ipykernel_25404\1104742602.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. water_infrastructure = water_infrastructure.append(data) C:\Users\jtrum\AppData\Local\Temp\ipykernel_25404\1104742602.py:10: UserWarning: The `geometries` module and `geometries_from_X` functions have been renamed the `features` module and `features_from_X` functions. Use these instead. The `geometries` module and function names are deprecated and will be removed in a future release. data = ox.geometries_from_polygon(aoi.geometry[0], tags=tags) C:\Users\jtrum\AppData\Local\Temp\ipykernel_25404\1104742602.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. water_infrastructure = water_infrastructure.append(data)
water_infrastructure
feature | geometry | |
---|---|---|
0 | landuse | POLYGON ((13.37171 -8.96650, 13.37154 -8.96654... |
1 | landuse | POLYGON ((13.42644 -8.96246, 13.42685 -8.96349... |
2 | landuse | POLYGON ((13.32465 -8.87447, 13.32464 -8.87460... |
3 | landuse | POLYGON ((13.40856 -9.12585, 13.40885 -9.12606... |
4 | landuse | POLYGON ((13.41248 -9.12853, 13.41237 -9.12871... |
... | ... | ... |
67 | man_made | POINT (13.41021 -9.02977) |
68 | man_made | POINT (13.40010 -9.06025) |
69 | man_made | POLYGON ((13.41069 -9.04171, 13.41066 -9.04166... |
70 | man_made | POLYGON ((13.41041 -9.15145, 13.40960 -9.15160... |
71 | man_made | POLYGON ((13.44903 -9.07548, 13.44872 -9.07420... |
72 rows × 2 columns
import rasterio
from rasterio.transform import from_origin
from rasterio.crs import CRS
GeoTIFF file with CRS and transform created: C:/Users/jtrum/world_bank/data/output.tif
# Load the TIFF file
with rasterio.open(datadir + 'output.tif') as src:
# Display the TIFF file using rasterio's show function
show(src)
# Overlay the GeoDataFrame on the plot
aoi.plot(ax=plt.gca(), color='red', markersize=5)
# Add titles and labels if needed
plt.title("GeoTIFF and GeoDataFrame Overlay")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
# Show the plot
plt.show()
# Load the GeoTIFF file
with rasterio.open(datadir + 'output.tif') as src:
# Plot the GeoTIFF in grayscale
show(src, cmap='gray', ax=plt.gca())
# Overlay the AOI outline in white with transparent background
aoi = aoi.to_crs(src.crs)
aoi.boundary.plot(ax=plt.gca(), color='white', linewidth=2, alpha=0.7)
# Plot the 'water_infrastructure' GeoDataFrame in blue
water_infrastructure.plot(ax=plt.gca(), color='blue', markersize=5)
<Axes: >
show(src)
--------------------------------------------------------------------------- RasterioIOError Traceback (most recent call last) Cell In[26], line 1 ----> 1 show(src) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\rasterio\plot.py:95, in show(source, with_bounds, contour, contour_label_kws, ax, title, transform, adjust, **kwargs) 92 kwargs['extent'] = plotting_extent(source) 94 if source.count == 1: ---> 95 arr = source.read(1, masked=True) 96 else: 97 try: 98 # Lookup table for the color space in the source file. 99 # This will allow us to re-order it to RGB if needed File rasterio\_io.pyx:639, in rasterio._io.DatasetReaderBase.read() File rasterio\_base.pyx:648, in rasterio._base.DatasetBase.mask_flag_enums.__get__() File rasterio\_base.pyx:650, in genexpr() File rasterio\_base.pyx:612, in genexpr() File rasterio\_base.pyx:612, in genexpr() File rasterio\_base.pyx:367, in rasterio._base.DatasetBase.band() File rasterio\_base.pyx:360, in rasterio._base.DatasetBase.handle() RasterioIOError: Dataset is closed: C:/Users/jtrum/world_bank/data/output.tif
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterio.features import geometry_mask
import numpy as np
# Load the GeoJSON as a GeoDataFrame
aoi = gpd.read_file(datadir + 'luanda2clean.geojson')
# Open the GeoTIFF file using rasterio
with rasterio.open(datadir + 'WSF2019_v1_12_-10.tif') as src:
# Crop the GeoTIFF to the GeoJSON extent
out_image, out_transform = mask(src, aoi.geometry, crop=True)
out_meta = src.meta.copy()
# Create a binary mask for the GeoTIFF values (0 or 255)
binary_mask = (out_image == 0) | (out_image == 255)
# Update the metadata to reflect the new cropped extent
out_meta.update({"driver": "GTiff",
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform,
"nodata": np.nan})
# Save the cropped GeoTIFF with preserved binarized values
with rasterio.open('cropped_wsf.tif', 'w', **out_meta) as dst:
dst.write(out_image)
# Now you have 'cropped_wsf.tif' with the cropped extent and NaN values for masked areas.
# You can use this GeoTIFF for further processing and overlay it with other data.
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[34], line 27 20 out_meta.update({"driver": "GTiff", 21 "height": out_image.shape[1], 22 "width": out_image.shape[2], 23 "transform": out_transform, 24 "nodata": np.nan}) 26 # Save the cropped GeoTIFF with preserved binarized values ---> 27 with rasterio.open('cropped_wsf.tif', 'w', **out_meta) as dst: 28 dst.write(out_image) 30 # Now you have 'cropped_wsf.tif' with the cropped extent and NaN values for masked areas. 31 # You can use this GeoTIFF for further processing and overlay it with other data. File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\rasterio\env.py:451, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds) 448 session = DummySession() 450 with env_ctor(session=session): --> 451 return f(*args, **kwds) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\rasterio\__init__.py:343, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs) 341 writer = get_writer_for_driver(driver) 342 if writer is not None: --> 343 dataset = writer( 344 path, 345 mode, 346 driver=driver, 347 width=width, 348 height=height, 349 count=count, 350 crs=crs, 351 transform=transform, 352 dtype=dtype, 353 nodata=nodata, 354 sharing=sharing, 355 **kwargs 356 ) 357 else: 358 raise DriverCapabilityError( 359 "Writer does not exist for driver: %s" % str(driver) 360 ) File rasterio\_io.pyx:1502, in rasterio._io.DatasetWriterBase.__init__() ValueError: Given nodata value, nan, is beyond the valid range of its data type, uint8.
BELOW
import os
os.environ['USE_PATH_FOR_GDAL_PYTHON'] = 'YES'
import pandas as pd
import numpy as np
import georasters as gr
c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\geopandas\_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn(
import brotli
print(brotli.__file__)
None
import shapely
print(shapely.__file__)
c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\shapely\__init__.py
import pyproj
pyproj.__version__
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 42 line 1 ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y111sZmlsZQ%3D%3D?line=0'>1</a> pyproj.__version__ AttributeError: module 'pyproj' has no attribute '__version__'
dir(pyproj)
['__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']
print(pyproj.__path__)
_NamespacePath(['c:\\Users\\jtrum\\miniconda3\\envs\\wash_scan\\lib\\site-packages\\pyproj'])
pyproj
<module 'pyproj' (namespace)>
wsf = gdal.Open(datadir + 'WSF2019_v1_12_-10.tif')
# crop first 7500 from the left side off the image
# crop first 5000 from the top of the image
wsf_crop = wsf.ReadAsArray(7500, 5000, wsf.RasterXSize - 7500, wsf.RasterYSize - 5000)
datadir = 'C:/Users/jtrum/world_bank/data/'
wsf = gr.from_file(datadir + 'WSF2019_v1_12_-10.tif')
'''
this step may not be necessary, but because of how large the shape is, it may be useful to reduce the size of the shapefile to only the area of interest
'''
x_min = wsf.bounds.left
x_max = wsf.bounds.left + 7500
y_min = wsf.bounds.bottom
y_max = wsf.bounds.top
wsf_crop = wsf.crop(x_min, y_min, x_max, y_max)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 43 line 2 <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X60sZmlsZQ%3D%3D?line=0'>1</a> datadir = 'C:/Users/jtrum/world_bank/data/' ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X60sZmlsZQ%3D%3D?line=1'>2</a> wsf = gr.from_file(datadir + 'WSF2019_v1_12_-10.tif') <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X60sZmlsZQ%3D%3D?line=3'>4</a> ''' <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X60sZmlsZQ%3D%3D?line=4'>5</a> this step may not be necessary, but because of how large the shape is, it may be useful to reduce the size of the shapefile to only the area of interest <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X60sZmlsZQ%3D%3D?line=5'>6</a> ''' <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#X60sZmlsZQ%3D%3D?line=6'>7</a> x_min = wsf.bounds.left NameError: name 'gr' is not defined
wsf.plot()
<Axes: >
wsf.shape
(22487, 22487)
wsf = wsf.to_pandas()
wsf.head()
--------------------------------------------------------------------------- MemoryError Traceback (most recent call last) Cell In[7], line 1 ----> 1 wsf = wsf.to_pandas() 2 wsf.head() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\georasters\georasters.py:550, in GeoRaster.to_pandas(self, dropna, **kwargs) 545 def to_pandas(self, dropna=True, **kwargs): 546 """ 547 Convert GeoRaster to Pandas DataFrame, which can be easily exported to other types of files 548 The DataFrame has the row, col, value, x, and y values for each cell 549 """ --> 550 df = to_pandas(self, dropna, **kwargs) 551 return df File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\georasters\georasters.py:1454, in to_pandas(raster, name, dropna) 1452 df = df.reset_index() 1453 df.columns = ['row', 'col', name] -> 1454 df['x'] = df.col.apply(lambda col: raster.geot[0]+(col)*raster.geot[1]) 1455 df['y'] = df.row.apply(lambda row: raster.geot[3]+(row)*raster.geot[-1]) 1456 return df File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\series.py:4771, in Series.apply(self, func, convert_dtype, args, **kwargs) 4661 def apply( 4662 self, 4663 func: AggFuncType, (...) 4666 **kwargs, 4667 ) -> DataFrame | Series: 4668 """ 4669 Invoke function on values of Series. 4670 (...) 4769 dtype: float64 4770 """ -> 4771 return SeriesApply(self, func, convert_dtype, args, kwargs).apply() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:1123, in SeriesApply.apply(self) 1120 return self.apply_str() 1122 # self.f is Callable -> 1123 return self.apply_standard() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:1174, in SeriesApply.apply_standard(self) 1172 else: 1173 values = obj.astype(object)._values -> 1174 mapped = lib.map_infer( 1175 values, 1176 f, 1177 convert=self.convert_dtype, 1178 ) 1180 if len(mapped) and isinstance(mapped[0], ABCSeries): 1181 # GH#43986 Need to do list(mapped) in order to get treated as nested 1182 # See also GH#25959 regarding EA support 1183 return obj._constructor_expanddim(list(mapped), index=obj.index) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\_libs\lib.pyx:2933, in pandas._libs.lib.map_infer() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\_libs\lib.pyx:2512, in pandas._libs.lib.maybe_convert_objects() MemoryError: Unable to allocate 3.77 GiB for an array with shape (505665169,) and data type int64
import rioxarray as rxr
import xarray as xr
import pandas as pd
datadir = 'C:/Users/jtrum/world_bank/data/'
array = rxr.open_rasterio(datadir + 'WSF2019_v1_12_-10.tif')
array = xr.open_rasterio(datadir + 'WSF2019_v1_12_-10.tif')
df = array[0].to_pandas()
df
C:\Users\jtrum\AppData\Local\Temp\ipykernel_24052\2192282450.py:2: DeprecationWarning: open_rasterio is Deprecated in favor of rioxarray. For information about transitioning, see: https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html array = xr.open_rasterio(datadir + 'WSF2019_v1_12_-10.tif')
x | 11.990039 | 11.990129 | 11.990218 | 11.990308 | 11.990398 | 11.990488 | 11.990578 | 11.990667 | 11.990757 | 11.990847 | ... | 14.009182 | 14.009272 | 14.009362 | 14.009451 | 14.009541 | 14.009631 | 14.009721 | 14.009811 | 14.009901 | 14.009990 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
y | |||||||||||||||||||||
-7.990020 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990110 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990200 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990290 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990380 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
-10.009613 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009703 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009792 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009882 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009972 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
22487 rows × 22487 columns
# Get the x and y coordinates as separate DataFrames
x_coords = pd.DataFrame({'x': df.columns})
y_coords = pd.DataFrame({'y': df.index})
# Melt the original DataFrame into a long format
melted_df = pd.melt(df.reset_index(), id_vars=['y'], var_name='x', value_name='value')
# Merge the x and y coordinates back into the melted DataFrame
final_df = melted_df.merge(x_coords, on='x').merge(y_coords, on='y')
final_df
y | x | value | |
---|---|---|---|
0 | -7.990020 | 11.990039 | 0 |
1 | -7.990020 | 11.990129 | 0 |
2 | -7.990020 | 11.990218 | 0 |
3 | -7.990020 | 11.990308 | 0 |
4 | -7.990020 | 11.990398 | 0 |
... | ... | ... | ... |
505665164 | -10.009972 | 14.009631 | 0 |
505665165 | -10.009972 | 14.009721 | 0 |
505665166 | -10.009972 | 14.009811 | 0 |
505665167 | -10.009972 | 14.009901 | 0 |
505665168 | -10.009972 | 14.00999 | 0 |
505665169 rows × 3 columns
final_df.to_csv(datadir + 'wsf2019.csv')
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[30], line 1 ----> 1 final_df.to_csv(datadir + 'wsf2019.csv') File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(*args, **kwargs) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\generic.py:3720, in NDFrame.to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options) 3709 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 3711 formatter = DataFrameFormatter( 3712 frame=df, 3713 header=header, (...) 3717 decimal=decimal, 3718 ) -> 3720 return DataFrameRenderer(formatter).to_csv( 3721 path_or_buf, 3722 lineterminator=lineterminator, 3723 sep=sep, 3724 encoding=encoding, 3725 errors=errors, 3726 compression=compression, 3727 quoting=quoting, 3728 columns=columns, 3729 index_label=index_label, 3730 mode=mode, 3731 chunksize=chunksize, 3732 quotechar=quotechar, 3733 date_format=date_format, 3734 doublequote=doublequote, 3735 escapechar=escapechar, 3736 storage_options=storage_options, 3737 ) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(*args, **kwargs) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\io\formats\format.py:1189, in DataFrameRenderer.to_csv(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, errors, storage_options) 1168 created_buffer = False 1170 csv_formatter = CSVFormatter( 1171 path_or_buf=path_or_buf, 1172 lineterminator=lineterminator, (...) 1187 formatter=self.fmt, 1188 ) -> 1189 csv_formatter.save() 1191 if created_buffer: 1192 assert isinstance(path_or_buf, StringIO) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\io\formats\csvs.py:261, in CSVFormatter.save(self) 241 with get_handle( 242 self.filepath_or_buffer, 243 self.mode, (...) 249 250 # Note: self.encoding is irrelevant here 251 self.writer = csvlib.writer( 252 handles.handle, 253 lineterminator=self.lineterminator, (...) 258 quotechar=self.quotechar, 259 ) --> 261 self._save() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\io\formats\csvs.py:266, in CSVFormatter._save(self) 264 if self._need_to_save_header: 265 self._save_header() --> 266 self._save_body() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\io\formats\csvs.py:304, in CSVFormatter._save_body(self) 302 if start_i >= end_i: 303 break --> 304 self._save_chunk(start_i, end_i) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\io\formats\csvs.py:315, in CSVFormatter._save_chunk(self, start_i, end_i) 312 data = [res.iget_values(i) for i in range(len(res.items))] 314 ix = self.data_index[slicer]._format_native_types(**self._number_format) --> 315 libwriters.write_csv_rows( 316 data, 317 ix, 318 self.nlevels, 319 self.cols, 320 self.writer, 321 ) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\_libs\writers.pyx:55, in pandas._libs.writers.write_csv_rows() KeyboardInterrupt:
#convert 'final_df' to a geodataframe
import geopandas as gpd
from shapely.geometry import Point
# Create a geometry column from the x and y coordinates
final_df['geometry'] = final_df.apply(lambda row: Point(row['x'], row['y']), axis=1)
final_df
--------------------------------------------------------------------------- MemoryError Traceback (most recent call last) Cell In[29], line 6 3 from shapely.geometry import Point 5 # Create a geometry column from the x and y coordinates ----> 6 final_df['geometry'] = final_df.apply(lambda row: Point(row['x'], row['y']), axis=1) 7 final_df File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\frame.py:9568, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs) 9557 from pandas.core.apply import frame_apply 9559 op = frame_apply( 9560 self, 9561 func=func, (...) 9566 kwargs=kwargs, 9567 ) -> 9568 return op.apply().__finalize__(self, method="apply") File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:764, in FrameApply.apply(self) 761 elif self.raw: 762 return self.apply_raw() --> 764 return self.apply_standard() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:891, in FrameApply.apply_standard(self) 890 def apply_standard(self): --> 891 results, res_index = self.apply_series_generator() 893 # wrap results 894 return self.wrap_results(results, res_index) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:905, in FrameApply.apply_series_generator(self) 902 results = {} 904 with option_context("mode.chained_assignment", None): --> 905 for i, v in enumerate(series_gen): 906 # ignore SettingWithCopy here in case the user mutates 907 results[i] = self.f(v) 908 if isinstance(results[i], ABCSeries): 909 # If we have a view on v, we need to make a copy because 910 # series_generator will swap out the underlying data File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:1018, in FrameColumnApply.series_generator(self) 1016 @property 1017 def series_generator(self): -> 1018 values = self.values 1019 values = ensure_wrapped_if_datetimelike(values) 1020 assert len(values) > 0 File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\_libs\properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:725, in FrameApply.values(self) 723 @cache_readonly 724 def values(self): --> 725 return self.obj.values File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\frame.py:11739, in DataFrame.values(self) 11666 """ 11667 Return a Numpy representation of the DataFrame. 11668 (...) 11736 ['monkey', nan, None]], dtype=object) 11737 """ 11738 self._consolidate_inplace() > 11739 return self._mgr.as_array() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\internals\managers.py:1770, in BlockManager.as_array(self, dtype, copy, na_value) 1768 arr = arr.astype(dtype, copy=False) 1769 else: -> 1770 arr = self._interleave(dtype=dtype, na_value=na_value) 1771 # The underlying data was copied within _interleave 1772 copy = False File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\internals\managers.py:1817, in BlockManager._interleave(self, dtype, na_value) 1815 for blk in self.blocks: 1816 rl = blk.mgr_locs -> 1817 arr = blk.get_values(dtype) 1818 result[rl.indexer] = arr 1819 itemmask[rl.indexer] = 1 File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\internals\blocks.py:1914, in NumpyBlock.get_values(self, dtype) 1912 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 1913 if dtype == _dtype_obj: -> 1914 return self.values.astype(_dtype_obj) 1915 return self.values MemoryError:
array.shape
(1, 22487, 22487)
df = array[0].to_pandas()
df
x | 11.990039 | 11.990129 | 11.990218 | 11.990308 | 11.990398 | 11.990488 | 11.990578 | 11.990667 | 11.990757 | 11.990847 | ... | 14.009182 | 14.009272 | 14.009362 | 14.009451 | 14.009541 | 14.009631 | 14.009721 | 14.009811 | 14.009901 | 14.009990 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
y | |||||||||||||||||||||
-7.990020 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990110 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990200 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990290 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-7.990380 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
-10.009613 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009703 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009792 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009882 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
-10.009972 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
22487 rows × 22487 columns
from shapely.geometry import Point
import geopandas as gpd
c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\geopandas\_compat.py:123: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn( C:\Users\jtrum\AppData\Local\Temp\ipykernel_24052\1691430808.py:2: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas: import os os.environ['USE_PYGEOS'] = '0' import geopandas In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html). import geopandas as gpd
# turn 'df' into a geodataframe
df = df.reset_index()
df = df.rename(columns={'index': 'x', 'level_0': 'y', 0: 'value'})
df['geometry'] = df.apply(lambda row: Point(row['x'], row['y']), axis=1)
df = gpd.GeoDataFrame(df, geometry='geometry')
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[23], line 4 2 df = df.reset_index() 3 df = df.rename(columns={'index': 'x', 'level_0': 'y', 0: 'value'}) ----> 4 df['geometry'] = df.apply(lambda row: Point(row['x'], row['y']), axis=1) 5 df = gpd.GeoDataFrame(df, geometry='geometry') File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\frame.py:9568, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs) 9557 from pandas.core.apply import frame_apply 9559 op = frame_apply( 9560 self, 9561 func=func, (...) 9566 kwargs=kwargs, 9567 ) -> 9568 return op.apply().__finalize__(self, method="apply") File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:764, in FrameApply.apply(self) 761 elif self.raw: 762 return self.apply_raw() --> 764 return self.apply_standard() File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:891, in FrameApply.apply_standard(self) 890 def apply_standard(self): --> 891 results, res_index = self.apply_series_generator() 893 # wrap results 894 return self.wrap_results(results, res_index) File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\pandas\core\apply.py:907, in FrameApply.apply_series_generator(self) 904 with option_context("mode.chained_assignment", None): 905 for i, v in enumerate(series_gen): 906 # ignore SettingWithCopy here in case the user mutates --> 907 results[i] = self.f(v) 908 if isinstance(results[i], ABCSeries): 909 # If we have a view on v, we need to make a copy because 910 # series_generator will swap out the underlying data 911 results[i] = results[i].copy(deep=False) Cell In[23], line 4, in <lambda>(row) 2 df = df.reset_index() 3 df = df.rename(columns={'index': 'x', 'level_0': 'y', 0: 'value'}) ----> 4 df['geometry'] = df.apply(lambda row: Point(row['x'], row['y']), axis=1) 5 df = gpd.GeoDataFrame(df, geometry='geometry') File c:\Users\jtrum\miniconda3\envs\wash_scan\lib\site-packages\shapely\geometry\point.py:70, in Point.__new__(self, *args) 67 coords = np.asarray(coords).squeeze() 68 else: 69 # 2 or 3 args ---> 70 coords = np.array(args).squeeze() 72 if coords.ndim > 1: 73 raise ValueError( 74 f"Point() takes only scalar or 1-size vector arguments, got {args}" 75 ) ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.
# import georasters as gr
import os
os.environ['USE_PATH_FOR_GDAL_PYTHON'] = 'YES'
import georaster as gr
import matplotlib.pyplot as plt
datadir = 'C:/Users/jtrum/world_bank/data/'
image = gr.SingleBandRaster(datadir + 'WSF2019_v1_12_-10.tif')
plt.imshow(image.r, cmap='gray')
<matplotlib.image.AxesImage at 0x2542f716e50>
#crop 'image' to the area of interest
#load the shapefile
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio as rio
datadir = 'C:/Users/jtrum/world_bank/data/'
aoi = gpd.read_file(datadir + 'aoiLuanda.geojson')
#crop the raster to the area of interest
with rio.open(datadir + 'WSF2019_v1_12_-10.tif') as src:
# You can now work with the raster data using 'src'
# For example, you can access metadata like CRS, transform, and more:
crs = src.crs
transform = src.transform
width = src.width
height = src.height
count = src.count # Number of bands
dtype = src.dtypes[0] # Data type of the first band
# Read a specific band from the raster (e.g., band 1)
band1 = src.read(1)
# Read the entire raster as a NumPy array (all bands)
full_raster = src.read()
full_raster.shape
(1, 22487, 22487)
full_raster
array([[[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)
#now crop 'full_raster' to the area of interest
image.shape
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 67 line 1 ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y125sZmlsZQ%3D%3D?line=0'>1</a> image.shape AttributeError: 'SingleBandRaster' object has no attribute 'shape'
image.proj()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 67 line 1 ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y123sZmlsZQ%3D%3D?line=0'>1</a> image.proj() TypeError: 'NoneType' object is not callable
import geopandas as gpd
import rasterio
from rasterio.features import geometry_mask
# Load your GeoRaster using Rasterio
with rasterio.open(datadir + 'WSF2019_v1_12_-10.tif') as src:
# Load your AOI GeoDataFrame (replace "aoi.shp" with the path to your AOI file)
aoi_gdf = gpd.read_file(datadir + "aoiLuanda.geojson")
# Use the total_bounds property of the AOI GeoDataFrame to get its bounding box
aoi_bounds = aoi_gdf.total_bounds
# Create a mask for the AOI within the GeoRaster's extent
mask = geometry_mask(aoi_gdf.geometry, out_shape=src.shape, transform=src.transform, invert=True)
# Read the GeoRaster data within the AOI
cropped_data = src.read(masked=True)
# Update the metadata for the cropped raster
src.meta.update({
'transform': src.window_transform(aoi_bounds)
})
# Save the cropped GeoRaster to a new file
with rasterio.open("cropped_georaster.tif", 'w', **src.meta) as dst:
dst.write(cropped_data)
# You now have the cropped GeoRaster in "cropped_georaster.tif"
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 75 line 2 <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=16'>17</a> cropped_data = src.read(masked=True) <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=18'>19</a> # Update the metadata for the cropped raster <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=19'>20</a> src.meta.update({ ---> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=20'>21</a> 'transform': src.window_transform(aoi_bounds) <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=21'>22</a> }) <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=23'>24</a> # Save the cropped GeoRaster to a new file <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y133sZmlsZQ%3D%3D?line=24'>25</a> with rasterio.open("cropped_georaster.tif", 'w', **src.meta) as dst: File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\rasterio\windows.py:98, in WindowMethodsMixin.window_transform(self, window) 84 """Get the affine transform for a dataset window. 85 86 Parameters (...) 95 96 """ 97 gtransform = guard_transform(self.transform) ---> 98 return transform(window, gtransform) File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\rasterio\windows.py:359, in transform(window, transform) 343 def transform(window, transform): 344 """Construct an affine transform matrix relative to a window. 345 346 Parameters (...) 357 358 """ --> 359 window = evaluate(window, height=0, width=0) 360 x, y = transform * (window.col_off or 0.0, window.row_off or 0.0) 361 return Affine.translation( 362 x - transform.c, y - transform.f) * transform File c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\rasterio\windows.py:440, in evaluate(window, height, width, boundless) 438 return window 439 else: --> 440 rows, cols = window 441 return Window.from_slices(rows=rows, cols=cols, height=height, 442 width=width, boundless=boundless) ValueError: too many values to unpack (expected 2)
import os
os.environ['USE_PATH_FOR_GDAL_PYTHON'] = 'YES'
#folder location
os.chdir(r'C:/Users/jtrum/world_bank/data/')
#read raster file
from osgeo import gdal, ogr, osr
raster = gdal.Open('WSF2019_v1_12_-10.tif')
raster
<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x0000025470A8B870> >
raster.RasterYSize
22487
band = raster.GetRasterBand(1)
band
<osgeo.gdal.Band; proxy of <Swig Object of type 'GDALRasterBandShadow *' at 0x000002545A529060> >
band.ReadAsArray()
array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
# converting raster to shapefile
proj = raster.GetProjection()
shp_proj = osr.SpatialReference()
shp_proj.ImportFromWkt(proj)
out = 'output/WSF_Luanda.shp'
call_drive = ogr.GetDriverByName('ESRI Shapefile')
create_shp = call_drive.CreateDataSource(out)
shp = create_shp.CreateLayer('layername', srs=shp_proj)
new_field = ogr.FieldDefn(str('ID'), ogr.OFTInteger)
shp.CreateField(new_field)
gdal.Polygonize(band, None, shp, 0, [], callback=None)
create_shp.Destroy()
raster = None
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) c:\Users\jtrum\world_bank\notebooks\wsf-2019.ipynb Cell 80 line 9 <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y142sZmlsZQ%3D%3D?line=6'>7</a> call_drive = ogr.GetDriverByName('ESRI Shapefile') <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y142sZmlsZQ%3D%3D?line=7'>8</a> create_shp = call_drive.CreateDataSource(out) ----> <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y142sZmlsZQ%3D%3D?line=8'>9</a> shp = create_shp.CreateLayer('layername', srs=shp_proj) <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y142sZmlsZQ%3D%3D?line=9'>10</a> new_field = ogr.FieldDefn(str('ID'), ogr.OFTInteger) <a href='vscode-notebook-cell:/c%3A/Users/jtrum/world_bank/notebooks/wsf-2019.ipynb#Y142sZmlsZQ%3D%3D?line=10'>11</a> shp.CreateField(new_field) AttributeError: 'NoneType' object has no attribute 'CreateLayer'
import os
from osgeo import gdal, ogr, osr
# Path to the input raster
raster_path = 'WSF2019_v1_12_-10.tif'
# Specify the output shapefile path
out = 'output/WSF_Luanda.shp'
# Check if the output directory exists, and create it if necessary
output_dir = os.path.dirname(out)
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
# Open the raster dataset
raster = gdal.Open(raster_path)
if raster is None:
print("Failed to open the raster dataset.")
else:
proj = raster.GetProjection()
shp_proj = osr.SpatialReference()
shp_proj.ImportFromWkt(proj)
call_drive = ogr.GetDriverByName('ESRI Shapefile')
create_shp = call_drive.CreateDataSource(out)
if create_shp is None:
print("Failed to create the shapefile.")
else:
shp = create_shp.CreateLayer('layername', srs=shp_proj)
new_field = ogr.FieldDefn('ID', ogr.OFTInteger)
shp.CreateField(new_field)
# Assuming 'band' is correctly defined
gdal.Polygonize(band, None, shp, 0, [], callback=None)
# Clean up
create_shp.Destroy()
raster = None
print("Shapefile created successfully.")
Shapefile created successfully.
import geopandas as gpd
import os
c:\Users\jtrum\miniconda3\envs\wash\lib\site-packages\geopandas\_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn( C:\Users\jtrum\AppData\Local\Temp\ipykernel_20824\3379007050.py:1: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas: import os os.environ['USE_PYGEOS'] = '0' import geopandas In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html). import geopandas as gpd
os.chdir(r'C:/Users/jtrum/world_bank/data/')
wsf = gpd.read_file('output/WSF_Luanda.shp')
aoi = gpd.read_file('aoiLuanda.geojson')
#set wsf crs to match aoi crs
wsf = wsf.to_crs(aoi.crs)
print(wsf.crs)
print(aoi.crs)
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
join = gpd.sjoin(wsf, aoi, how='inner', predicate='intersects')
join = join[join.geometry.intersects(aoi.unary_union)]
len(join)
383504
join['index_right'].value_counts()
index_right 0 383504 Name: count, dtype: int64
#make a plot of within, with aoi underneath and colored by column 'ID'
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10, 10))
#aoi underlain, transparent background but outlined in red
aoi.plot(ax=ax, edgecolor='red', alpha=0.5)
within.plot(ax=ax, column='ID', cmap='gray', alpha=0.5)
#add colorbar
sm = plt.cm.ScalarMappable(cmap='gray', norm=plt.Normalize(vmin=within['ID'].min(), vmax=within['ID'].max()))
plt.show()
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Define a custom colormap with green for 0 and blue for 255
cmap = ListedColormap(['green', 'blue'])
fig, ax = plt.subplots(figsize=(10, 10))
# Plot the AOI with a red outline and transparent background
#aoi.plot(ax=ax, edgecolor='red', alpha=0.5)
# Plot the 'within' GeoDataFrame using the custom colormap and column 'ID'
within.plot(ax=ax, column='ID', cmap=cmap, alpha=1)
# Create a custom legend
legend_labels = {0: 'Not Developed', 255: 'Developed'}
handles = [plt.Line2D([0], [0], marker='o', color='w', label=legend_labels[label], markersize=10, markerfacecolor=color)
for label, color in zip(legend_labels.keys(), ['green', 'blue'])]
ax.legend(handles=handles, title='Legend', loc='lower right')
plt.show()
within
result = gpd.GeoDataFrame([merge], columns=['geometry'])
result.reset_index(drop=True, inplace=True)
luanda = gpd.read_file('aoiLuanda.geojson')
#read shapefile
import geopandas as gpd
import pandas as pd
#read shapefile
wsf_shp = gpd.read_file('output/WSF_Luanda.shp')
wsf_shp
geometry |
---|