import os
import glob
import csv
import requests
from pathlib import Path
import xarray as xr
import rioxarray as rio
import pandas as pd
import geopandas as gpd
import numpy as np
import rasterio
city = 'Luanda'
data_folder = Path(r"D:\World Bank\CRP\data\WaterGAP")
output_folder = Path('output')
int_output_folder = Path('intermediate_output')
# read catchment AOI
aoi = gpd.read_file('AOI/luanda_catchment_level4.shp').to_crs(epsg = 4326)
var_list = ['ncrun', 'qs']
year_list = range(1987, 2017)
for var in var_list:
if not os.path.exists(data_folder / f'{var}_raster2d'):
os.mkdir(data_folder / f'{var}_raster2d')
# open netcdf
nc = rio.open_rasterio(data_folder / f'watergap_22d_WFDEI-GPCC_histsoc_{var}_monthly_1901_2016.nc4',
decode_times = False)
# set time dimension
units, reference_date = nc.time.attrs['units'].split('since')
nc['time'] = pd.date_range(start = reference_date, periods = nc.sizes['time'], freq = 'MS')
# output raster 2D if it does not already exist
for year in year_list:
for month in range(1, 13):
output_raster = data_folder / f'{var}_raster2d' / f'{var}_{str(year)}{str(month).zfill(2)}.tif'
if not os.path.exists(output_raster):
nc.rio.write_crs("epsg:4326", inplace = True).sel(time = f'{str(year)}-{str(month).zfill(2)}-01').rio.to_raster(output_raster)
# clip each raster
for var in var_list:
for year in year_list:
for month in range(1, 13):
input_raster = data_folder / f'{var}_raster2d' / f'{var}_{str(year)}{str(month).zfill(2)}.tif'
with rasterio.open(input_raster) as src:
# shapely presumes all operations on two or more features exist in the same Cartesian plane.
out_image, out_transform = rasterio.mask.mask(
src, aoi.geometry, crop = True)
if year == year_list[0] and month == 1:
out_meta = src.meta.copy()
out_meta.update({"driver": "GTiff",
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform})
output_file = f'{var}_{str(year)}{str(month).zfill(2)}.tif'
with rasterio.open(int_output_folder / output_file, "w", **out_meta) as dest:
dest.write(out_image)
# average raster (or other summary stats) and save
for var in var_list:
raster_list = []
for year in year_list:
for month in range(1, 13):
with rasterio.open(int_output_folder / f'{var}_{str(year)}{str(month).zfill(2)}.tif') as src:
raster_list.append(src.read(1))
if year == year_list[0] and month == 1:
meta = src.meta
output_raster_mean = np.nanmean(raster_list, axis = 0)
output_raster_std = np.nanstd(raster_list, axis = 0)
with rasterio.open(output_folder / f'{city.lower()}_{var}_mean.tif', 'w', **meta) as dst:
dst.write(output_raster_mean, 1)
with rasterio.open(output_folder / f'{city.lower()}_{var}_std.tif', 'w', **meta) as dst:
dst.write(output_raster_std, 1)
# generate time series stats
for var in var_list:
time_series = {'mean': {}, 'std': {},
'max' : {}, 'min': {}}
for year in year_list:
for month in range(1, 13):
with rasterio.open(int_output_folder / f'{var}_{str(year)}{str(month).zfill(2)}.tif') as src:
src_array = src.read(1)
src_array = src_array[src_array != src.meta['nodata']]
time_series['mean'][f'{str(year)}{str(month).zfill(2)}'] = np.nanmean(src_array)
time_series['std'][f'{str(year)}{str(month).zfill(2)}'] = np.nanstd(src_array)
time_series['min'][f'{str(year)}{str(month).zfill(2)}'] = np.nanmin(src_array)
time_series['max'][f'{str(year)}{str(month).zfill(2)}'] = np.nanmax(src_array)
with open(output_folder / f'{city.lower()}_{var}.csv', 'w') as f:
f.write('year,month,mean,std,min,max\n')
for year in year_list:
for month in range(1, 13):
f.write('%s,%s,%s,%s,%s,%s\n' % (str(year), str(month).zfill(2),
time_series['mean'][f'{str(year)}{str(month).zfill(2)}'],
time_series['std'][f'{str(year)}{str(month).zfill(2)}'],
time_series['min'][f'{str(year)}{str(month).zfill(2)}'],
time_series['max'][f'{str(year)}{str(month).zfill(2)}']))
# delete intermediate outputs
for var in var_list:
for year in year_list:
for month in range(1, 13):
os.remove(int_output_folder / f'{var}_{str(year)}{str(month).zfill(2)}.tif')