import numpy as np
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import dask
dask.config.set(scheduler='synchronous')
#from dask.distributed import Client, LocalCluster
#cluster = LocalCluster(processes=True)
#client = Client(cluster) # memory_limit='16GB',
import xarray as xr
from dask.diagnostics import ProgressBar
import numpy as np
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#import joblib
from sklearn.pipeline import Pipeline
#from dask_ml.preprocessing import StandardScaler
#from dask_ml.decomposition import PCA
#from dask_ml.xgboost import XGBRegressor
#from dask_ml.linear_model import LogisticRegression
#from dask_ml.linear_model import LinearRegression
#from sklearn.linear_model import Ridge
import keras
from keras.layers.core import Dropout
Using TensorFlow backend.
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) ~/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow.py in <module> 57 ---> 58 from tensorflow.python.pywrap_tensorflow_internal import * 59 from tensorflow.python.pywrap_tensorflow_internal import __version__ ~/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow_internal.py in <module> 27 return _mod ---> 28 _pywrap_tensorflow_internal = swig_import_helper() 29 del swig_import_helper ~/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow_internal.py in swig_import_helper() 23 try: ---> 24 _mod = imp.load_module('_pywrap_tensorflow_internal', fp, pathname, description) 25 finally: ~/.conda/envs/ml_flood/lib/python3.7/imp.py in load_module(name, file, filename, details) 241 else: --> 242 return load_dynamic(name, filename, file) 243 elif type_ == PKG_DIRECTORY: ~/.conda/envs/ml_flood/lib/python3.7/imp.py in load_dynamic(name, path, file) 341 name=name, loader=loader, origin=path) --> 342 return _load(spec) 343 ImportError: /home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/../../_solib_k8/_U@mkl_Ulinux_S_S_Cmkl_Ulibs_Ulinux___Uexternal_Smkl_Ulinux_Slib/libmklml_intel.so: symbol __kmpc_omp_task_with_deps, version VERSION not defined in file libiomp5.so with link time reference During handling of the above exception, another exception occurred: ImportError Traceback (most recent call last) <ipython-input-17-7d97a8021eb8> in <module> 31 #from sklearn.linear_model import Ridge 32 ---> 33 import keras 34 from keras.layers.core import Dropout ~/.conda/envs/ml_flood/lib/python3.7/site-packages/keras/__init__.py in <module> 1 from __future__ import absolute_import 2 ----> 3 from . import utils 4 from . import activations 5 from . import applications ~/.conda/envs/ml_flood/lib/python3.7/site-packages/keras/utils/__init__.py in <module> 4 from . import data_utils 5 from . import io_utils ----> 6 from . import conv_utils 7 8 # Globally-importable utils. ~/.conda/envs/ml_flood/lib/python3.7/site-packages/keras/utils/conv_utils.py in <module> 7 from six.moves import range 8 import numpy as np ----> 9 from .. import backend as K 10 11 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/keras/backend/__init__.py in <module> 87 elif _BACKEND == 'tensorflow': 88 sys.stderr.write('Using TensorFlow backend.\n') ---> 89 from .tensorflow_backend import * 90 else: 91 # Try and load external backend. ~/.conda/envs/ml_flood/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py in <module> 3 from __future__ import print_function 4 ----> 5 import tensorflow as tf 6 from tensorflow.python.framework import ops as tf_ops 7 from tensorflow.python.training import moving_averages ~/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/__init__.py in <module> 32 33 # pylint: disable=g-bad-import-order ---> 34 from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import 35 from tensorflow.python.tools import module_util as _module_util 36 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/__init__.py in <module> 47 import numpy as np 48 ---> 49 from tensorflow.python import pywrap_tensorflow 50 51 # Protocol buffers ~/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow.py in <module> 72 for some common reasons and solutions. Include the entire stack trace 73 above this error message when asking for help.""" % traceback.format_exc() ---> 74 raise ImportError(msg) 75 76 # pylint: enable=wildcard-import,g-import-not-at-top,unused-import,line-too-long ImportError: Traceback (most recent call last): File "/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow.py", line 58, in <module> from tensorflow.python.pywrap_tensorflow_internal import * File "/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow_internal.py", line 28, in <module> _pywrap_tensorflow_internal = swig_import_helper() File "/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/pywrap_tensorflow_internal.py", line 24, in swig_import_helper _mod = imp.load_module('_pywrap_tensorflow_internal', fp, pathname, description) File "/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/imp.py", line 242, in load_module return load_dynamic(name, filename, file) File "/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/imp.py", line 342, in load_dynamic return _load(spec) ImportError: /home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/tensorflow/python/../../_solib_k8/_U@mkl_Ulinux_S_S_Cmkl_Ulibs_Ulinux___Uexternal_Smkl_Ulinux_Slib/libmklml_intel.so: symbol __kmpc_omp_task_with_deps, version VERSION not defined in file libiomp5.so with link time reference Failed to load the native TensorFlow runtime. See https://www.tensorflow.org/install/errors for some common reasons and solutions. Include the entire stack trace above this error message when asking for help.
import sys
print(sys.executable)
def shift_time(ds, value):
ds.coords['time'].values = pd.to_datetime(ds.coords['time'].values) + value
return ds
static = xr.open_dataset('../data/danube/era5_slt_z_slor_lsm_stationary_field.nc')
print(static)
glofas = xr.open_dataset('../data/danube/glofas_reanalysis_danube_1981-2002.nc')
glofas = glofas.rename({'lat': 'latitude', 'lon': 'longitude'}) # to have the same name like in era5
glofas = shift_time(glofas, -dt.timedelta(days=1))
z_glofas = static['z'].isel(time=0)/9.81 # converting to m approx.
z_glofas = z_glofas.interp(latitude=glofas.latitude,
longitude=glofas.longitude)
print(z_glofas)
dis = glofas['dis']
print(dis)
plt.figure(figsize=(26,6))
i = 48.45
j = 13.75
z_point = z_glofas.sel(latitude=slice(i, i-0.01)).sel(longitude=slice(j, j+0.01))
z_glofas.plot()
# river mask
river = dis.min('time') > 5
river.name = 'river mask [0/1]'
dis.mean('time').where(river).plot(cmap='RdBu')
#dis.mean('time').plot(cmap='RdBu')
ax = plt.gca()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
plt.figure(figsize=(26,6))
i = 48.45
j = 13.75
z_point = z_glofas.sel(latitude=slice(i, i-0.01)).sel(longitude=slice(j, j+0.01))
z_glofas.where(z_glofas.values >= z_point.values).plot()
dis.mean('time').where(river).plot(cmap='RdBu')
ax = plt.gca()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
plt.figure(figsize=(26,6))
i = 48.45
j = 13.75
z_point = z_glofas.sel(latitude=slice(i, i-0.01)).sel(longitude=slice(j, j+0.01))
z_glofas.where(z_glofas.values >= z_point.values-150).plot()
dis.mean('time').where(river).plot(cmap='RdBu')
ax = plt.gca()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
dis_point = dis.where(z_point).squeeze()
dis_point
def correlate(da_3d, da_timeseries, timelag=False):
a = da_3d - da_3d.mean('time')
b = da_timeseries - da_timeseries.mean('time')
N = len(b.coords['time'])
if timelag:
b = b.drop('time')
a = a.drop('time')
# out = b.dot(a)/a.std('time')/b.std()/N
out = np.zeros(a.isel(time=1).shape)*np.nan
i = 0
for lat in a.latitude:
j = 0
for lon in a.longitude:
a_iter = a.sel(latitude=lat, longitude=lon)
out[i,j] = np.corrcoef(a_iter, b)[0,1]
j += 1
i += 1
out = xr.DataArray(out, dims=['latitude', 'longitude'], coords=[a.latitude, a.longitude])
out.name = 'correlation coefficient'
return out
plt.figure(figsize=(26,6))
i = 48.45
j = 13.75
z_point = z_glofas.sel(latitude=slice(i, i-0.01)).sel(longitude=slice(j, j+0.01))
z_glofas.where(z_glofas.values >= z_point.values-150).plot()
#dis.mean('time').where(river).plot(cmap='RdBu')
ax = plt.gca()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
a = correlate(dis.where(river), dis_point)
a.plot(cmap='rainbow')
def correlate(da_3d, da_timeseries, timelag=False):
a = da_3d - da_3d.mean('time')
# a = da_3d
b = da_timeseries - da_timeseries.mean('time')
# b = da_timeseries
N = len(b.coords['time'])
if timelag:
b = b.drop('time')
a = a.drop('time')
# out = b.dot(a)/a.std('time')/b.std()/N
out = np.zeros(a.isel(time=1).shape)*np.nan
i = 0
for lat in a.latitude:
j = 0
for lon in a.longitude:
a_iter = a.sel(latitude=lat, longitude=lon)
out[i,j] = np.corrcoef(a_iter, b)[0,1]
j += 1
i += 1
out = xr.DataArray(out, dims=['latitude', 'longitude'], coords=[a.latitude, a.longitude])
out.name = 'correlation coefficient'
return out
lags = [-1, 1]
timelag_corrs = np.full((len(lags), dis.latitude.shape[0], dis.longitude.shape[0]), np.nan)
for t, lag in enumerate(lags):
if lag > 0: # dis_box with data from previous timesteps
dis_point_lag = dis.where(z_point).squeeze()[lag:]
dis_box = dis.where(river)[:-lag]
elif lag < 0: # dis_box with data from future timesteps
dis_point_lag = dis.where(z_point).squeeze()[:lag]
dis_box = dis.where(river)[-lag:]
timelag_corrs[t,:,:] = correlate(dis_box, dis_point_lag, timelag=True)
lag_influencing = timelag_corrs[1,:,:] > timelag_corrs[0,:,:]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-19-dba18a7b13e1> in <module> 7 dis_box = dis.where(river)[:-lag] 8 elif lag < 0: # dis_box with data from future timesteps ----> 9 dis_point_lag = dis.where(z_point).squeeze()[:lag] 10 dis_box = dis.where(river)[-lag:] 11 NameError: name 'z_point' is not defined
plt.figure(figsize=(26,6))
plt.imshow(timelag_corrs[0,:,:])
plt.colorbar()
plt.figure(figsize=(26,6))
plt.imshow(timelag_corrs[1,:,:])
plt.colorbar()
plt.figure(figsize=(26,6))
plt.imshow(timelag_corrs[0,:,:] - timelag_corrs[1,:,:])
plt.colorbar()
lag_influencing = timelag_corrs[1,:,:] > timelag_corrs[0,:,:]
plt.figure(figsize=(26,6))
plt.imshow(lag_influencing)
lag_influencing = timelag_corrs[1,:,:] < timelag_corrs[0,:,:]
plt.figure(figsize=(26,6))
plt.imshow(lag_influencing)
/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/ipykernel_launcher.py:13: RuntimeWarning: invalid value encountered in greater del sys.path[0] /home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/ipykernel_launcher.py:17: RuntimeWarning: invalid value encountered in less
<matplotlib.image.AxesImage at 0x7fe2e64c8828>
lag_influencing
array([[False, False, False, ..., False, False, False], [False, False, False, ..., False, False, False], [False, False, False, ..., False, False, False], ..., [False, False, False, ..., False, False, False], [False, False, False, ..., False, False, False], [False, False, False, ..., False, False, False]])
plt.figure(figsize=(26,6))
dis_box_mean = dis.mean('time')
dis_box_mean.where(dis_box_mean > 50).plot()
mask_box_mean_greater = (~np.isnan(dis_box_mean.where(dis_box_mean > 50))).astype(bool)
--------------------------------------------------------------------------- MemoryError Traceback (most recent call last) <ipython-input-22-ebd4d4e05922> in <module> 1 plt.figure(figsize=(26,6)) ----> 2 dis_box_mean = dis.mean('time') 3 dis_box_mean.where(dis_box_mean > 50).plot() 4 5 mask_box_mean_greater = (~np.isnan(dis_box_mean.where(dis_box_mean > 50))).astype(bool) ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/common.py in wrapped_func(self, dim, axis, skipna, **kwargs) 32 **kwargs): 33 return self.reduce(func, dim, axis, ---> 34 skipna=skipna, allow_lazy=True, **kwargs) 35 else: 36 def wrapped_func(self, dim=None, axis=None, # type: ignore ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/dataarray.py in reduce(self, func, dim, axis, keep_attrs, keepdims, **kwargs) 1891 1892 var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, -> 1893 **kwargs) 1894 return self._replace_maybe_drop_dims(var) 1895 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in reduce(self, func, dim, axis, keep_attrs, keepdims, allow_lazy, **kwargs) 1385 if dim is not None: 1386 axis = self.get_axis_num(dim) -> 1387 input_data = self.data if allow_lazy else self.values 1388 if axis is not None: 1389 data = func(input_data, axis=axis, **kwargs) ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in data(self) 292 return self._data 293 else: --> 294 return self.values 295 296 @data.setter ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in values(self) 387 def values(self): 388 """The variable's data as a numpy.ndarray""" --> 389 return _as_array_or_item(self._data) 390 391 @values.setter ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in _as_array_or_item(data) 208 TODO: remove this (replace with np.asarray) once these issues are fixed 209 """ --> 210 data = np.asarray(data) 211 if data.ndim == 0: 212 if data.dtype.kind == 'M': ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype) 630 631 def __array__(self, dtype=None): --> 632 self._ensure_cached() 633 return np.asarray(self.array, dtype=dtype) 634 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in _ensure_cached(self) 627 def _ensure_cached(self): 628 if not isinstance(self.array, NumpyIndexingAdapter): --> 629 self.array = NumpyIndexingAdapter(np.asarray(self.array)) 630 631 def __array__(self, dtype=None): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype) 608 609 def __array__(self, dtype=None): --> 610 return np.asarray(self.array, dtype=dtype) 611 612 def __getitem__(self, key): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype) 514 def __array__(self, dtype=None): 515 array = as_indexable(self.array) --> 516 return np.asarray(array[self.key], dtype=None) 517 518 def transpose(self, order): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/coding/variables.py in __array__(self, dtype) 67 68 def __array__(self, dtype=None): ---> 69 return self.func(self.array) 70 71 def __repr__(self): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/coding/variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype) 138 for fv in encoded_fill_values: 139 condition |= data == fv --> 140 return np.where(condition, decoded_fill_value, data) 141 142 MemoryError:
<Figure size 1872x432 with 0 Axes>
plt.figure(figsize=(26,6))
z_river = z_glofas.where(river)
z_river.where(z_river > z_point.squeeze()-100).plot()
mask_river_altitude_greater = (~np.isnan(z_river.where(z_river > z_point.squeeze()-100))).astype(bool)
plt.figure(figsize=(26,6))
(dis_box_mean > pct*dis_box_mean.sel(latitude=dis_point.latitude, longitude=dis_point.longitude)).plot()
mask_mean_greater_pct_point = (dis_box_mean > pct*dis_box_mean.sel(latitude=dis_point.latitude, longitude=dis_point.longitude)).astype(bool)
plt.figure(figsize=(26,6))
plt.imshow(lag_influencing > 0.9)
mask_lag_greater = (lag_influencing > 0.9).astype(bool)
pct = 0.2
# select feature gridpoints
plt.figure(figsize=(26,6))
influencer = (dis_box_mean > pct*dis_box_mean.sel(latitude=dis_point.latitude, longitude=dis_point.longitude)) \
&(river==1) & (lag_influencing > 0.5) \
&(z_glofas.where(z_glofas >= z_point.squeeze()-50)).astype(bool)
influencer.name = 'gridpoints influencing discharge [0/1]'
influencer.plot()
plt.figure(figsize=(26,6))
z_glofas.where(z_glofas > z_point.squeeze()-100).plot()
mask_z_greater = (~np.isnan(z_glofas.where(z_glofas > z_point.squeeze()-100))).astype(bool)
mask_box_mean_greater
mask_river_altitude_greater
mask_mean_greater_pct_point
mask_lag_greater
mask_z_greater
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-23-e1225d0fe6d6> in <module> 1 mask_box_mean_greater ----> 2 mask_river_altitude_greater 3 mask_mean_greater_pct_point 4 mask_lag_greater 5 mask_z_greater NameError: name 'mask_river_altitude_greater' is not defined
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_box_mean_greater).plot()
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_river_altitude_greater).plot()
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_mean_greater_pct_point).plot()
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_lag_greater).plot()
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_z_greater).plot()
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_box_mean_greater & mask_river_altitude_greater \
&mask_lag_greater & mask_z_greater & mask_mean_greater_pct_point).plot()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
plt.figure(figsize=(26,6))
z_river = z_glofas.where(river)
z_river.where(z_river > z_point.squeeze()-100).plot()
dis_point
import geopandas
from rasterio import features
from affine import Affine
def filter_data_inside_single_basin(da, kw_basins='Danube'):
def transform_from_latlon(lat, lon):
lat = np.asarray(lat)
lon = np.asarray(lon)
trans = Affine.translation(lon[0], lat[0])
scale = Affine.scale(lon[1] - lon[0], lat[1] - lat[0])
return trans * scale
def rasterize(shapes, coords, fill=np.nan, **kwargs):
"""Rasterize a list of (geometry, fill_value) tuples onto the given
xray coordinates. This only works for 1d latitude and longitude
arrays.
"""
transform = transform_from_latlon(coords['latitude'], coords['longitude'])
out_shape = (len(coords['latitude']), len(coords['longitude']))
raster = features.rasterize(shapes, out_shape=out_shape,
fill=fill, transform=transform,
dtype=float, **kwargs)
return xr.DataArray(raster, coords=coords, dims=('latitude', 'longitude'))
# this shapefile is from natural earth data
# http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-1-states-provinces/
shp2 = '/raid/home/srvx7/lehre/users/a1303583/ipython/ml_flood/data/drainage_basins/Major_Basins_of_the_World.shp'
basins = geopandas.read_file(shp2)
# print(basins)
single_basin = basins.query("NAME == '"+kw_basins+"'").reset_index(drop=True)
# print(single_basin)
shapes = [(shape, n) for n, shape in enumerate(single_basin.geometry)]
da['basins'] = rasterize(shapes, da.coords)
return da.where(da.basins == 0)
z_danube = filter_data_inside_single_basin(z_glofas, kw_basins='Danube')
mask_danube_basin = ~np.isnan(z_danube).astype(bool)
plt.figure(figsize=(26,6))
z_danube.plot()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
z_downstream = z_glofas.where(z_glofas.longitude <= j)
mask_downstream = ~np.isnan(z_downstream).astype(bool)
plt.figure(figsize=(26,6))
z_glofas.where(mask_downstream).plot()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
z_downstream = z_glofas.where(z_glofas >= z_point.squeeze())
z_downstream = z_downstream.where(z_glofas.longitude >= j)
mask_downstream = ~np.isnan(z_downstream).astype(bool)
plt.figure(figsize=(26,6))
z_glofas.where(mask_downstream).plot()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
plt.figure(figsize=(26,6))
dis_box_mean.where(mask_box_mean_greater).plot()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
plt.figure(figsize=(26,6))
z_danube.plot()
dis_box_mean.where(mask_box_mean_greater & mask_danube_basin & mask_downstream).plot(cmap='rainbow')
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
dis_point.plot()
static = xr.open_dataset('../data/usa/era5_slt_z_slor_lsm_stationary_field.nc')
print(static)
alti = static['z'].isel(time=0)/9.81
glofas = xr.open_dataset('../data/usa/glofas_reanalysis_usa_1981-2002.nc')
glofas = glofas.rename({'lat': 'latitude', 'lon': 'longitude'}) # to have the same name like in era5
glofas = shift_time(glofas, -dt.timedelta(days=1))
z_glofas = static['z'].isel(time=0)/9.81 # converting to m approx.
z_glofas = z_glofas.interp(latitude=glofas.latitude,
longitude=glofas.longitude)
print(z_glofas)
dis = glofas['dis']
print(dis)
alti.where(alti > 0).plot.imshow()
<xarray.Dataset> Dimensions: (latitude: 101, longitude: 221, time: 744) Coordinates: * longitude (longitude) float32 -125.0 -124.75 -124.5 ... -70.5 -70.25 -70.0 * latitude (latitude) float32 50.0 49.75 49.5 49.25 ... 25.5 25.25 25.0 * time (time) datetime64[ns] 2017-01-01 ... 2017-01-31T23:00:00 Data variables: slor (time, latitude, longitude) float32 ... lsm (time, latitude, longitude) float32 ... slt (time, latitude, longitude) float32 ... z (time, latitude, longitude) float32 ... Attributes: Conventions: CF-1.6 history: 2019-05-29 15:32:37 GMT by grib_to_netcdf-2.10.0: /opt/ecmw... <xarray.DataArray 'z' (latitude: 250, longitude: 550)> array([[ 1.033807e+02, 1.295250e+02, 1.556693e+02, ..., 4.951622e+02, 4.973871e+02, 4.996119e+02], [ 1.442023e+02, 1.641796e+02, 1.841568e+02, ..., 4.795543e+02, 4.817030e+02, 4.838518e+02], [ 1.850239e+02, 1.988342e+02, 2.126444e+02, ..., 4.639464e+02, 4.660190e+02, 4.680916e+02], ..., [ 1.189793e-01, -1.559314e-01, -4.308422e-01, ..., -3.778829e-01, -6.951612e-01, -1.012439e+00], [ 1.824191e-01, -5.018794e-02, -2.827950e-01, ..., -1.664437e-01, -3.991144e-01, -6.317851e-01], [ 2.458588e-01, 5.555556e-02, -1.347477e-01, ..., 4.499553e-02, -1.030677e-01, -2.511309e-01]]) Coordinates: time datetime64[ns] 2017-01-01 * latitude (latitude) float64 49.95 49.85 49.75 49.65 ... 25.25 25.15 25.05 * longitude (longitude) float64 -124.9 -124.8 -124.7 ... -70.25 -70.15 -70.05 <xarray.DataArray 'dis' (time: 8035, latitude: 250, longitude: 550)> [1104812500 values with dtype=float32] Coordinates: * longitude (longitude) float64 -124.9 -124.8 -124.7 ... -70.25 -70.15 -70.05 * latitude (latitude) float64 49.95 49.85 49.75 49.65 ... 25.25 25.15 25.05 * time (time) datetime64[ns] 1981-01-01 1981-01-02 ... 2002-12-31 Attributes: long_name: discharge units: m3/s
<matplotlib.image.AxesImage at 0x7fe2f5abe550>
j = -92
i = 31
point = (i, j)
alti.sel(latitude=point[0], longitude=point[1])
<xarray.DataArray 'z' ()> array(20.876736, dtype=float32) Coordinates: longitude float32 -92.0 latitude float32 31.0 time datetime64[ns] 2017-01-01
import geopandas
from rasterio import features
from affine import Affine
def filter_data_inside_single_basin(da, kw_basins='Danube'):
def transform_from_latlon(lat, lon):
lat = np.asarray(lat)
lon = np.asarray(lon)
trans = Affine.translation(lon[0], lat[0])
scale = Affine.scale(lon[1] - lon[0], lat[1] - lat[0])
return trans * scale
def rasterize(shapes, coords, fill=np.nan, **kwargs):
"""Rasterize a list of (geometry, fill_value) tuples onto the given
xray coordinates. This only works for 1d latitude and longitude
arrays.
"""
transform = transform_from_latlon(coords['latitude'], coords['longitude'])
out_shape = (len(coords['latitude']), len(coords['longitude']))
raster = features.rasterize(shapes, out_shape=out_shape,
fill=fill, transform=transform,
dtype=float, **kwargs)
return xr.DataArray(raster, coords=coords, dims=('latitude', 'longitude'))
# this shapefile is from natural earth data
# http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-1-states-provinces/
shp2 = '/raid/home/srvx7/lehre/users/a1303583/ipython/ml_flood/data/drainage_basins/Major_Basins_of_the_World.shp'
basins = geopandas.read_file(shp2)
# print(basins)
single_basin = basins.query("NAME == '"+kw_basins+"'").reset_index(drop=True)
# print(single_basin)
shapes = [(shape, n) for n, shape in enumerate(single_basin.geometry)]
da['basins'] = rasterize(shapes, da.coords)
return da.where(da.basins == 0)
z_mississippi = filter_data_inside_single_basin(z_glofas, kw_basins='Mississippi')
mask_mississippi_basin = ~np.isnan(z_mississippi).astype(bool)
plt.figure(figsize=(15,6))
z_mississippi.plot(cmap='viridis')
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
z_downstream = z_glofas.where(z_glofas.latitude >= i)
mask_downstream = ~np.isnan(z_downstream).astype(bool)
plt.figure(figsize=(15,6))
z_glofas.where(mask_downstream).plot(cmap='viridis')
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
dis_usa = glofas['dis']
dis_usa_mean = dis_usa[:1000].mean('time')
/home/srvx11/lehre/users/a1303583/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/nanops.py:160: RuntimeWarning: Mean of empty slice return np.nanmean(a, axis=axis, dtype=dtype)
plt.figure(figsize=(15,6))
#dis_usa_mean.where(dis_usa_mean > 100).plot(cmap='RdBu')
mask_box_mean_greater = (~np.isnan(dis_usa_mean.where(dis_usa_mean > 5))).astype(bool)
dis_usa_mean.where(mask_box_mean_greater).plot(cmap='viridis')
p = plt.scatter(j, i, s=500, marker='o', lw=3, facecolor='none', color='cyan')
plt.figure(figsize=(15,6))
dis_usa_mean.where(mask_box_mean_greater & mask_mississippi_basin & mask_downstream).plot()
p = plt.scatter(j, i, s=1500, marker='o', lw=10, facecolor='none', color='cyan')
def add_shifted_predictors(ds, shifts, variables='all'):
"""Adds additional variables to an array which are shifted in time.
Parameters
----------
ds : xr.Dataset
shifts : list of integers
variables : str or list
"""
if variables == 'all':
variables = ds.data_vars
for var in variables:
for i in shifts:
if i == 0: continue # makes no sense to shift by zero
newvar = var+'-'+str(i)
ds[newvar] = ds[var].shift(time=i)
return ds
shifts = range(1,4)
X_dis = add_shifted_predictors(glofas, shifts, variables='all')
X_dis = X_dis.drop('dis') # we actually want to predict (t) with (t-1, t-2, t-3)
y_dis = glofas['dis']
--------------------------------------------------------------------------- MemoryError Traceback (most recent call last) <ipython-input-15-7b9511c281e3> in <module> 1 shifts = range(1,4) ----> 2 X_dis = add_shifted_predictors(glofas, shifts, variables='all') 3 X_dis = X_dis.drop('dis') # we actually want to predict (t) with (t-1, t-2, t-3) 4 y_dis = glofas['dis'] <ipython-input-14-4330aa0c7fa9> in add_shifted_predictors(ds, shifts, variables) 15 if i == 0: continue # makes no sense to shift by zero 16 newvar = var+'-'+str(i) ---> 17 ds[newvar] = ds[var].shift(time=i) 18 return ds ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/dataarray.py in shift(self, shifts, fill_value, **shifts_kwargs) 2399 """ 2400 variable = self.variable.shift( -> 2401 shifts=shifts, fill_value=fill_value, **shifts_kwargs) 2402 return self._replace(variable=variable) 2403 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in shift(self, shifts, fill_value, **shifts_kwargs) 1029 result = self 1030 for dim, count in shifts.items(): -> 1031 result = result._shift_one_dim(dim, count, fill_value=fill_value) 1032 return result 1033 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in _shift_one_dim(self, dim, count, fill_value) 971 keep = slice(None) 972 --> 973 trimmed_data = self[(slice(None),) * axis + (keep,)].data 974 975 if fill_value is dtypes.NA: ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in data(self) 292 return self._data 293 else: --> 294 return self.values 295 296 @data.setter ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in values(self) 387 def values(self): 388 """The variable's data as a numpy.ndarray""" --> 389 return _as_array_or_item(self._data) 390 391 @values.setter ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/variable.py in _as_array_or_item(data) 208 TODO: remove this (replace with np.asarray) once these issues are fixed 209 """ --> 210 data = np.asarray(data) 211 if data.ndim == 0: 212 if data.dtype.kind == 'M': ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype) 630 631 def __array__(self, dtype=None): --> 632 self._ensure_cached() 633 return np.asarray(self.array, dtype=dtype) 634 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in _ensure_cached(self) 627 def _ensure_cached(self): 628 if not isinstance(self.array, NumpyIndexingAdapter): --> 629 self.array = NumpyIndexingAdapter(np.asarray(self.array)) 630 631 def __array__(self, dtype=None): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype) 608 609 def __array__(self, dtype=None): --> 610 return np.asarray(self.array, dtype=dtype) 611 612 def __getitem__(self, key): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype) 514 def __array__(self, dtype=None): 515 array = as_indexable(self.array) --> 516 return np.asarray(array[self.key], dtype=None) 517 518 def transpose(self, order): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/coding/variables.py in __array__(self, dtype) 67 68 def __array__(self, dtype=None): ---> 69 return self.func(self.array) 70 71 def __repr__(self): ~/.conda/envs/ml_flood/lib/python3.7/site-packages/xarray/coding/variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype) 138 for fv in encoded_fill_values: 139 condition |= data == fv --> 140 return np.where(condition, decoded_fill_value, data) 141 142 MemoryError:
def preprocess_reshape(X_dis, y_dis, i, j):
"""Reshape, merge predictor/predictand in time, drop nans."""
X_dis = X_dis.to_array(dim='time_feature')
X_dis = X_dis.stack(features=['latitude', 'longitude', 'time_feature'])
Xar = X_dis.dropna('features', how='all')
yar = y_dis[:,i,j]
yar = yar.drop(['latitude', 'longitude'])
yar.coords['features'] = 'dis'
Xy = xr.concat([Xar, yar], dim='features')
Xyt = Xy.dropna('time', how='any') # drop them as we cannot train on nan values
time = Xyt.time
Xda = Xyt[:,:-1]
yda = Xyt[:,-1]
return Xda, yda, time
# space subset, dimensionality reduction
#X_dis = X_dis.where(upstream)
# time subset
X_dis = X_dis.where(noprecip)
y_dis = y_dis.where(noprecip)
Xda, yda, time = preprocess_reshape(X_dis, y_dis, i,j)
N_train = 365*3
N_valid = 365
X_train = Xda[:N_train,:]
y_train = yda[:N_train]
X_valid = Xda[N_train:N_train+N_valid,:]
y_valid = yda[N_train:N_train+N_valid]
print(Xda.shape)
print('train shapes:', X_train.shape, y_train.shape)
print('valid shapes:', X_valid.shape, y_valid.shape)
(2392, 78) train shapes: (1095, 78) (1095,) valid shapes: (365, 78) (365,)
def add_time(vector, time, name=None):
"""Converts arrays to xarrays with a time coordinate."""
return xr.DataArray(vector, dims=('time'), coords={'time': time}, name=name)
class KerasDenseNN(object):
def __init__(self, **kwargs):
model = keras.models.Sequential()
self.cfg = kwargs
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(8,
kernel_initializer='normal',
bias_initializer='zeros',
activation='relu')) #('sigmoid'))
#model.add(Dropout(self.cfg.get('dropout')))
#model.add(keras.layers.Dense(32))
#model.add(keras.layers.Activation('sigmoid'))
#model.add(Dropout(self.cfg.get('dropout')))
#model.add(keras.layers.Dense(16))
#model.add(keras.layers.Activation('sigmoid'))
#model.add(Dropout(self.cfg.get('dropout')))
#model.add(keras.layers.Dense(8))
#model.add(keras.layers.Activation('sigmoid'))
#model.add(Dropout(self.cfg.get('dropout')))
model.add(keras.layers.Dense(1, activation='linear'))
# bias_initializer=keras.initializers.Constant(value=9000)))
#ha = self.cfg.get('hidden_activation')
#for N_nodes in self.cfg.get('N_hidden_nodes'):
#
# model.add(hidden)
# model.add(ha.copy())
#
# if self.cfg.get('dropout'):
# model.add(Dropout(self.cfg.get('dropout')))#
#outputlayer = keras.layers.Dense(1, activation='linear')
#optimizer_name, options_dict = self.cfg.get('optimizer')
#optimizer = getattr(keras.optimizers, optimizer_name)(**options_dict)
#optimizer = keras.optimizers.SGD(lr=0.01)
rmsprop = keras.optimizers.RMSprop(lr=.1)
sgd = keras.optimizers.SGD(lr=0.1, decay=1e-6, momentum=0.5, nesterov=True)
model.compile(loss=self.cfg.get('loss'),
optimizer=rmsprop)
self.model = model
self.callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
min_delta=1, patience=100, verbose=0, mode='auto',
baseline=None, restore_best_weights=True),]
def predict(self, Xda, name=None):
a = self.model.predict(Xda.values).squeeze()
return add_time(a, Xda.time, name=name)
def fit(self, Xda, yda, **kwargs):
return self.model.fit(Xda.values, yda.values.reshape(-1,1),
epochs=self.cfg.get('epochs', None),
batch_size=self.cfg.get('batch_size', None),
callbacks=self.callbacks,
verbose=0,
**kwargs)
mlp_kws = dict(optimizer=('sgd', dict(lr=1)),
loss='mean_squared_error',
#N_hidden_nodes=(4,4),
#hidden_activation=keras.layers.Activation('sigmoid'), #keras.layers.ReLU(), #-LeakyReLU(alpha=0.3), #'relu',
#output_activation='linear',
#bias_initializer='random_uniform',
batch_size=128,
dropout=0., #.25,
epochs=1000,
)
linear_kws = dict(C=.1, n_jobs=-1, max_iter=10000, verbose=True)
if False:
pipe = Pipeline([('scaler', StandardScaler()),
('pca', PCA(n_components=4)),
('model', LinearRegression(**linear_kws)),],
verbose=True)
if True:
pipe = Pipeline([#('scaler', StandardScaler()),
#('pca', PCA(n_components=2)),
('model', KerasDenseNN(**mlp_kws)),],
verbose=False)
pipe
Pipeline(memory=None, steps=[('model', <__main__.KerasDenseNN object at 0x7f5b722e1da0>)], verbose=False)
history = pipe.fit(X_train, y_train,
model__validation_data=(X_valid, #.values,
y_valid)) #.values.reshape(-1,1)))
keras.utils.print_summary(pipe.named_steps['model'].model)
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= batch_normalization_1 (Batch (None, 78) 312 _________________________________________________________________ dense_1 (Dense) (None, 8) 632 _________________________________________________________________ dense_2 (Dense) (None, 1) 9 ================================================================= Total params: 953 Trainable params: 797 Non-trainable params: 156 _________________________________________________________________
h = history.named_steps['model'].model.history
# Plot training & validation loss values
plt.plot(h.history['loss'], label='loss')
plt.plot(h.history['val_loss'], label='val_loss')
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend() #['Train', 'Test'], loc='upper left')
plt.gca().set_yscale('log')
plt.show()
y_train_pred = pipe.predict(X_train, name='dis-fcst-train')
y_valid_pred = pipe.predict(X_valid, name='dis-fcst-valid')
X_valid.values.shape
(365, 78)
fig, ax = plt.subplots(figsize=(24,5))
minpred = X_train.min('features')
maxpred = X_train.max('features')
minpred.plot(ax=ax, linestyle='--', label='predictor-min')
maxpred.plot(ax=ax, linestyle='--', label='predictor-max')
dis[:,i,j].to_pandas().plot(ax=ax, label='dis-reanalysis')
y_train_pred.plot(ax=ax, marker='.', lw=0)
plt.legend()
plt.gca().set_xlim(dt.datetime(1981,1,1), y_train_pred.time.values[-1])
(723181.0, 726629.0)
fig, ax = plt.subplots(figsize=(24,5))
minpred = add_time(Xda.min(axis=1), time)
maxpred = add_time(Xda.max(axis=1), time)
minpred.plot(ax=ax, linestyle='--', label='predictor-min')
maxpred.plot(ax=ax, linestyle='--', label='predictor-max')
dis[:,i,j].to_pandas().plot(ax=ax, label='dis-reanalysis')
y_valid_pred.plot(ax=ax, marker='.', lw=0)
plt.legend()
plt.gca().set_xlim(y_valid_pred.time.values[0], y_valid_pred.time.values[-1])
(726630.0, 727717.0)