Open In Colab

Perbandingan Model Variasi Recurrent Neural Networks Pada Kasus Prediksi Debit Aliran

Buku (Notebook) ini akan membandingkan hasil prediksi debit aliran dari model konseptual (NRECA dan FJMOCK) dan beberapa model Deep Learning yaitu Artificial Neural Network (Multi Layer Perceptron (MLP)) dan variasi Recurrent Neural Networks (Vanila RNN, Long Short-Term Memory (LSTM), Gated Recurrent Unit (GRU)). Buku ini lebih mengutamakan implementasi penggunaan, dan mempresentasikan hasil dari masing-masing model (visualisasi).

Buku ini juga memeragakan penggunaan hidrokit versi 0.3.5. Informasi mengenai hidrokit dapat dibaca di hidrokit.github.io/hidrokit.

PENGATURAN BUKU

Buku ini dikembangkan menggunakan Google Colab. Runtime Google Colab digunakan saat pelatihan model, sedangkan runtime lokal untuk evaluasi dan pengembangan kode.

In [ ]:
#@title PENGGUNAAN RUNTIME (GOOGLE COLAB/LOKAL) {display-mode:"form", run:"auto"}
# Using Google Colab or Local System
_IS_LOCAL = True #@param {type:"boolean"}

(print('RUNTIME: LOKAL') if _IS_LOCAL else print('RUNTIME: GOOGLE COLAB'))
RUNTIME: LOKAL
In [ ]:
#@title PENGGUNAAN MODEL TERKALIBRASI {display-mode:"form", run:"auto"}
#@markdown Model NRECA & FJ. MOCK
_USE_CALIBRATED_MODEL = True #@param {type:"boolean"}

_CALD_NRECA_BULANAN = 'nreca_res_bulanan.csv' #@param {type:"string"}
_CALD_NRECA_2p = 'nreca_res_2p.csv' #@param {type:"string"}

_CALD_FJMOCK_BULANAN = 'fjmock_res_bulanan.csv' #@param {type:"string"}
_CALD_FJMOCK_2p = 'fjmock_res_2p.csv' #@param {type:"string"}

print(f'USE CALIBRATED MODEL: {_USE_CALIBRATED_MODEL}')
USE CALIBRATED MODEL: True
In [ ]:
#@title PENGGUNAAN MODEL TERLATIH {display-mode:"form", run:"auto"}
#@markdown Model ANN, RNN, LSTM, dan GRU
_USE_TRAINED_MODEL = True #@param {type:"boolean"}

#@markdown ## ANN
# ANN
_TRAINED_ANN_MODEL_TS5 = 'ann_model_ts5.h5' #@param {type:"string"}
_ANN_HISTORY_TS5 = 'ann_history_ts5.csv' #@param {type:"string"}
_TRAINED_ANN_MODEL_TS10 = 'ann_model_ts10.h5' #@param {type:"string"}
_ANN_HISTORY_TS10 = 'ann_history_ts10.csv' #@param {type:"string"}
_TRAINED_ANN_MODEL_TS365 = 'ann_model_ts365.h5' #@param {type:"string"}
_ANN_HISTORY_TS365 = 'ann_history_ts365.csv' #@param {type:"string"}

#@markdown ## RNN
# RNN
_TRAINED_RNN_MODEL_TS5 = 'rnn_model_ts5.h5' #@param {type:"string"}
_RNN_HISTORY_TS5 = 'rnn_history_ts5.csv' #@param {type:"string"}
_TRAINED_RNN_MODEL_TS10 = 'rnn_model_ts10.h5' #@param {type:"string"}
_RNN_HISTORY_TS10 = 'rnn_history_ts10.csv' #@param {type:"string"}
_TRAINED_RNN_MODEL_TS365 = 'rnn_model_ts365.h5' #@param {type:"string"}
_RNN_HISTORY_TS365 = 'rnn_history_ts365.csv' #@param {type:"string"}

#@markdown ## LSTM
# LSTM
_TRAINED_LSTM_MODEL_TS5 = 'lstm_model_ts5.h5' #@param {type:"string"}
_LSTM_HISTORY_TS5 = 'lstm_history_ts5.csv' #@param {type:"string"}
_TRAINED_LSTM_MODEL_TS10 = 'lstm_model_ts10.h5' #@param {type:"string"}
_LSTM_HISTORY_TS10 = 'lstm_history_ts10.csv' #@param {type:"string"}
_TRAINED_LSTM_MODEL_TS365 = 'lstm_model_ts365.h5' #@param {type:"string"}
_LSTM_HISTORY_TS365 = 'lstm_history_ts365.csv' #@param {type:"string"}

#@markdown ## GRU
# GRU
_TRAINED_GRU_MODEL_TS5 = 'gru_model_ts5.h5' #@param {type:"string"}
_GRU_HISTORY_TS5 = 'gru_history_ts5.csv' #@param {type:"string"}
_TRAINED_GRU_MODEL_TS10 = 'gru_model_ts10.h5' #@param {type:"string"}
_GRU_HISTORY_TS10 = 'gru_history_ts10.csv' #@param {type:"string"}
_TRAINED_GRU_MODEL_TS365 = 'gru_model_ts365.h5' #@param {type:"string"}
_GRU_HISTORY_TS365 = 'gru_history_ts365.csv' #@param {type:"string"}

#@markdown ## RECORD RUNNING
# GRU
_RECORDED_RUN = 'recorded_run.csv' #@param {type:"string"}

print(f'USE TRAINED MODEL: {_USE_TRAINED_MODEL}')
USE TRAINED MODEL: True
In [ ]:
#@title TAMPIL/SIMPAN GAMBAR {display-mode:"form", run:"auto"}
_SHOW_IMAGES = True #@param {type:"boolean"}
_SAVE_IMAGE = True #@param {type:"boolean"}

INISIASI

In [ ]:
if _IS_LOCAL:
    pass
else:
    %tensorflow_version 2.x
In [ ]:
from pathlib import Path
if _IS_LOCAL:
    _LOCAL_DIRECTORY = './_dropbox/vivaldi' #@param {type:"string"}
    _DIRECTORY = Path(_LOCAL_DIRECTORY) 
    _DIRIMAGE = _DIRECTORY / 'img'
else:
    from google.colab import drive
    drive.mount('/content/gdrive')
    _CLOUD_DIRECTORY = '/content/gdrive/My Drive/Colab Notebooks/_dropbox/vivaldi' #@param {type:"string"}
    _DIRECTORY = Path(_CLOUD_DIRECTORY)
    _DIRIMAGE = _DIRECTORY / 'img'
In [ ]:
print(':: MEMERIKSA PAKET HIDROKIT')
try:
    import hidrokit
except ModuleNotFoundError:
    print(':: MEMASANG PAKET HIDROKIT')
    !pip install hidrokit -q
:: MEMERIKSA PAKET HIDROKIT
In [ ]:
print(':: MEMERIKSA PAKET HYDROERR')
try:
    import HydroErr
except ModuleNotFoundError:
    print(':: INSTALASI PAKET HYDROERR')
    !pip install HydroErr -q
    import HydroErr
:: MEMERIKSA PAKET HYDROERR
In [ ]:
def _check_system(PACKAGE_LIST='numpy pandas matplotlib'):
    from pkg_resources import get_distribution
    from sys import version_info

    print(':: INFORMASI VERSI SISTEM')
    print(':: {:>12s} version: {:<10s}'.format(
            'python', 
            '{}.{}.{}'.format(*version_info[:3]))
        )
    for package in PACKAGE_LIST.split():
        print(':: {:>12s} version: {:<10s}'.format(
            package, 
            get_distribution(package).version)
        )

def _check_tf():
    import tensorflow as tf
    print(':: {:>12s} version: {:<10s}'.format(
            'tensorflow', 
            tf.__version__)
        )
    print(':: {:>12s} version: {:<10s}'.format(
            'keras', 
            tf.keras.__version__)
        )

_check_system('numpy pandas matplotlib hydroerr hidrokit')
_check_tf()
:: INFORMASI VERSI SISTEM
::       python version: 3.6.10    
::        numpy version: 1.18.1    
::       pandas version: 1.0.0     
::   matplotlib version: 3.1.3     
::     hydroerr version: 1.24      
::     hidrokit version: 0.3.5     
::   tensorflow version: 2.0.0     
::        keras version: 2.2.4-tf  
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import HydroErr as he
import seaborn as sns
import re
import os
import contextlib

from hidrokit.contrib.taruma import (
    hk43, hk53, hk73, hk79, hk84,
    hk88, hk89, hk90, hk96, hk98,
    hk99, hk106, hk87
)
from hidrokit.prep import timeseries
from itertools import product

# TENSORFLOW
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping

pd.options.display.float_format = '{:.5f}'.format

print(':: IMPORT LIBRARY')
:: IMPORT LIBRARY
In [ ]:
from contextlib import contextmanager
from datetime import datetime, timezone, timedelta

#ref: https://stackoverflow.com/a/20924212/4886384

_RECORD_RUNNING = {}

@contextmanager
def timeit_context(process, info_list=_RECORD_RUNNING):
    starttime = datetime.now(timezone(timedelta(hours=7)))
    str_start = starttime.strftime("%Y%m%d %H:%M")
    print(f':: {process} START: {str_start}')
    yield
    endtime = datetime.now(timezone(timedelta(hours=7)))
    str_end = endtime.strftime("%Y%m%d %H:%M")
    print(f':: {process} END: {str_end}')
    elapsedtime = endtime - starttime
    print(f':: {process} DURATION: {elapsedtime.seconds/60:.2f} min')
    info_list[process] = [str_start, str_end, elapsedtime.seconds/60]

def clean_title(title):
    new = re.sub(r'\W+', ' ', title).lower()
    return new.replace(' ', '_')
In [ ]:
# plt.style.use('seaborn-whitegrid')
# plt.style.use('seaborn-colorblind')
plt.rcParams['axes.xmargin'] = 0
plt.rcParams['axes.ymargin'] = 0
plt.rcParams['legend.frameon'] = True
plt.rcParams['legend.loc'] = 'upper right'

DATASET

Dataset yang digunakan sama dengan dataset pada LI-#3, sehingga akuisisi dataset dan prapemrosesan data akan dipersingkat dalam penulisan kodenya beserta penjelasannya.

In [ ]:
URL_DEBIT = 'https://taruma.github.io/assets/hidrokit_dataset/hidrokit_poc_demo_debit.xlsx'
URL_HUJAN = 'https://taruma.github.io/assets/hidrokit_dataset/hidrokit_poc_demo_hujan.xlsx'
URL_KLIMATOLOGI = 'https://taruma.github.io/assets/hidrokit_dataset/hidrokit_poc_demo_klimatologi.xlsx'

print(':: ALAMAT DATASET')
print(f':: URL_HUJAN = {URL_HUJAN}')
print(f':: URL_DEBIT = {URL_DEBIT}')
print(f':: URL_KLIMATOLOGI = {URL_KLIMATOLOGI}')
:: ALAMAT DATASET
:: URL_HUJAN = https://taruma.github.io/assets/hidrokit_dataset/hidrokit_poc_demo_hujan.xlsx
:: URL_DEBIT = https://taruma.github.io/assets/hidrokit_dataset/hidrokit_poc_demo_debit.xlsx
:: URL_KLIMATOLOGI = https://taruma.github.io/assets/hidrokit_dataset/hidrokit_poc_demo_klimatologi.xlsx
In [ ]:
def _parse_stations(stations):
    return stations.replace(' ', '').split(',')

print(':: MEMBACA INFO DATASET HUJAN DAN DEBIT')
info_hujan = hk79._get_info(URL_HUJAN)
info_debit = hk79._get_info(URL_DEBIT)
stations_hujan = _parse_stations(info_hujan['stations'])
stations_debit = _parse_stations(info_debit['stations'])
:: MEMBACA INFO DATASET HUJAN DAN DEBIT

TAHAP 1: AKUISISI DATASET

Lihat LI-#3 untuk lebih detail.

In [ ]:
print(':: MEMBACA DATA HUJAN DARI [URL_HUJAN]')
dataset_hujan = hk88.read_workbook(URL_HUJAN, stations_hujan)
print(':: MEMBACA DATA DEBIT DARI [URL_DEBIT]')
dataset_debit = hk88.read_workbook(URL_DEBIT, stations_debit)
print(':: MEMBACA DATA KLIMATOLOGI DARI [URL_KLIMATOLOGI]')
dataset_klimatologi = hk73._read_bmkg(URL_KLIMATOLOGI)
:: MEMBACA DATA HUJAN DARI [URL_HUJAN]
:: MEMBACA DATA DEBIT DARI [URL_DEBIT]
:: MEMBACA DATA KLIMATOLOGI DARI [URL_KLIMATOLOGI]

TAHAP 2: PRAPEMROSESAN DATA

Lihat LI-#3 untuk lebih detail.

In [ ]:
def find_invalid(df):
    results = {}
    for col in df.columns:
        results[col] = hk43._check_invalid(df.loc[:, col].values)
    return results
In [ ]:
print(':: PRAPEMROSESAN DATA HUJAN')
invalid_hujan = find_invalid(dataset_hujan)
dataset_hujan[dataset_hujan == '-'] = 0
dataset_hujan = dataset_hujan.infer_objects()
dataset_hujan.interpolate(method='linear', inplace=True)
dataset_hujan.bfill(inplace=True)
print(':: PRAPEMROSESAN DATA DEBIT')
invalid_debit = find_invalid(dataset_debit)
dataset_debit[dataset_debit == '20.9.46'] = 209.46
dataset_debit[dataset_debit == 'tad'] = 0.
dataset_debit = dataset_debit.infer_objects()
dataset_debit.interpolate(method='linear', inplace=True)
print(':: PRAPEMROSESAN DATA KLIMATOLOGI')
_klim_col = ['Tavg', 'ss', 'RH_avg', 'ff_avg']
dataset_klimatologi = dataset_klimatologi[_klim_col].copy()
dataset_klimatologi.interpolate(method='linear', inplace=True)
dataset_klimatologi['ss'] = dataset_klimatologi['ss'] / 8 * 100
dataset_klimatologi.columns = ['TEMP', 'SUN', 'HUMID', 'WIND']
:: PRAPEMROSESAN DATA HUJAN
:: PRAPEMROSESAN DATA DEBIT
:: PRAPEMROSESAN DATA KLIMATOLOGI

TAHAP 3: INPUT MODEL

Input model untuk model konseptual dan deep learning memiliki perbedaan yang cukup signifikan. Pada model konseptual, input data harus memiliki proses perhitungan hujan rata-rata, evapotranspirasi, yang kemudian dilanjutkan dengan perekapan data (resample) data menjadi perbulan atau per jumlah hari sembarang.

Sedangkan untuk model deep learning, diupayakan untuk menggunakan input data "sementah" mungkin. Dari dataset yang ada, hanya dimanipulasi berdasarkan jumlah timestep yang digunakan.

In [ ]:
print(':: PERIODE DIMULAI DARI 1 MARET 1998')
_period = slice('19980301', None)

hujan = dataset_hujan[_period]
debit = dataset_debit[_period]
klimatologi = dataset_klimatologi[_period]
:: PERIODE DIMULAI DARI 1 MARET 1998
In [ ]:
print(':: PENGGABUNGAN DATASET')
raw_dataset = pd.concat([hujan, klimatologi, debit], axis=1)
raw_dataset.columns = (
    'ch_A ch_B ch_C ch_D ch_E ch_F ch_G'.split() + 
    'ch_H temp sun humid wind debit'.split()
)
:: PENGGABUNGAN DATASET

INPUT MODEL KONSEPTUAL

Untuk model NRECA dan FJMOCK, dibuat dua set dataset dengan rekap per bulan dan per 2 periode (16 hari dan 15 hari).

Thiessen

In [ ]:
AREA_BASIN = {
    'ch_A': 18.638, 'ch_B': 208.920, 'ch_C': 147.520,
    'ch_D': 499.413, 'ch_E': 205.003, 'ch_F': 76.695,
    'ch_G': 127.507, 'ch_H': 166.899
}

print(':: MENGHITUNG HUJAN RERATA (THIESSEN)')
hujan_thiessen = hk99.apply_thiessen(
    raw_dataset[list(AREA_BASIN.keys())], AREA_BASIN
)
:: MENGHITUNG HUJAN RERATA (THIESSEN)

Evapotranspirasi

In [ ]:
print(':: MENENTUKAN GARIS LINTANG LOKASI')
LATITUDE = '6.25 LS'

print(':: MENGHITUNG EVAPOTRANSPIRASI MENGGUNAKAN RUMUS PENMAN')
eto_penman = hk106.ETo_Penman(
    raw_dataset,
    'temp', 'humid', 'wind', 'sun',
    lat=LATITUDE
)
:: MENENTUKAN GARIS LINTANG LOKASI
:: MENGHITUNG EVAPOTRANSPIRASI MENGGUNAKAN RUMUS PENMAN
In [ ]:
print(':: PENGGABUNGAN DATASET UNTUK MODEL KONSEPTUAL')
dataset_concept = pd.concat([hujan_thiessen, eto_penman, debit], axis=1)
dataset_concept.columns = 'ch eto debit'.split()
:: PENGGABUNGAN DATASET UNTUK MODEL KONSEPTUAL

REKAP

In [ ]:
print(':: OBJEK FUNGSI UNTUK PEREKAPAN DATA HUJAN')
def n_rain(x):
    return (x > 0).sum()

hujan_func = [np.sum, n_rain, len]
hujan_func_col = ['precip', 'nrain', 'ndays']
:: OBJEK FUNGSI UNTUK PEREKAPAN DATA HUJAN

Bulanan

In [ ]:
print(':: MEMBUAT REKAP BULANAN')
hujan_bulanan = hk98.summary_station(
    dataset_concept, column='ch', ufunc=hujan_func, ufunc_col=hujan_func_col
    ).droplevel(0, axis=1)

eto_bulanan = hk98.summary_station(
    dataset_concept, column='eto', 
    ufunc=[np.sum, np.mean], ufunc_col=['eto', 'eto_mean']
    ).droplevel(0, axis=1)

debit_bulanan = hk98.summary_station(
    dataset_concept, column='debit', ufunc=np.mean, ufunc_col='debit'
    ).droplevel(0, axis=1)

data_bulanan = pd.concat([hujan_bulanan, eto_bulanan, debit_bulanan], axis=1)
print(':: MENAMPILKAN INFO [data_bulanan]')
data_bulanan.info()
:: MEMBUAT REKAP BULANAN
:: MENAMPILKAN INFO [data_bulanan]
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 130 entries, 1998-03-01 to 2008-12-01
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   precip    130 non-null    float64
 1   nrain     130 non-null    float64
 2   ndays     130 non-null    float64
 3   eto       130 non-null    float64
 4   eto_mean  130 non-null    float64
 5   debit     130 non-null    float64
dtypes: float64(6)
memory usage: 7.1 KB
In [ ]:
print(':: GRAFIK HUJAN DAN DEBIT [data_bulanan]')
if _SHOW_IMAGES:
    data_bulanan[['precip', 'debit', 'eto', 'eto_mean']].plot(
        subplots=True, sharex=True, 
        title=['PRECIP ($mm/bulan$)',
            'DEBIT ($m^3/hari$)',
            'ETO ($mm/bulan$)',
            'ETO_MEAN ($mm/hari$)'],
        figsize=(20, 10), legend=False);
:: GRAFIK HUJAN DAN DEBIT [data_bulanan]

2 Periode

In [ ]:
_ndays = '16D'

print(':: MEMBUAT REKAP 2 PERIODE')
hujan_2p = hk98.summary_station(
    dataset_concept, column='ch', ufunc=hujan_func, ufunc_col=hujan_func_col,
    n_days=_ndays
    ).droplevel(0, axis=1)

eto_2p = hk98.summary_station(
    dataset_concept, column='eto', 
    ufunc=[np.sum, np.mean], ufunc_col=['eto', 'eto_mean'],
    n_days=_ndays
    ).droplevel(0, axis=1)

debit_2p = hk98.summary_station(
    dataset_concept, column='debit', ufunc=np.mean, ufunc_col='debit',
    n_days=_ndays
    ).droplevel(0, axis=1)

data_2p = pd.concat([hujan_2p, eto_2p, debit_2p], axis=1)
print(':: MENAMPILKAN INFO [data_2p]')
data_2p.info()
:: MEMBUAT REKAP 2 PERIODE
:: MENAMPILKAN INFO [data_2p]
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 260 entries, 1998-03-01 to 2008-12-17
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   precip    260 non-null    float64
 1   nrain     260 non-null    float64
 2   ndays     260 non-null    float64
 3   eto       260 non-null    float64
 4   eto_mean  260 non-null    float64
 5   debit     260 non-null    float64
dtypes: float64(6)
memory usage: 14.2 KB
In [ ]:
print(':: GRAFIK HUJAN DAN DEBIT [data_2p]')
if _SHOW_IMAGES:
    data_2p[['precip', 'debit', 'eto', 'eto_mean']].plot(
        subplots=True, sharex=True, 
        title=['PRECIP ($mm/periode$)',
            'DEBIT ($m^3/hari$)',
            'ETO ($mm/periode$)',
            'ETO_MEAN ($mm/hari$)'],
        figsize=(20, 10), legend=False);
:: GRAFIK HUJAN DAN DEBIT [data_2p]

INPUT MODEL DEEP LEARNING

Train Set diakhiri pada tanggal 31 Desember 2006, dan Test set dimulai dari tanggal 1 Januari 2007. Input model untuk ANN berupa tensor 2 dimensi sedangkan untuk RNN berupa tensor 3 dimensi. Input model variasi RNN sama dengan input model RNN.

Dataset

In [ ]:
import seaborn as sns
In [ ]:
def plot_corr_mat(df, savefig=_SAVE_IMAGE):
    corr = df.corr()
    mask = np.triu(np.ones_like(corr, dtype=np.bool))
    fig, ax = plt.subplots(figsize=(12, 10))
    fig.tight_layout()
    sns.heatmap(
            corr, mask=mask, cmap='RdGy', 
            center=0, square=True, robust=True, 
            linewidth=.5, cbar_kws={'shrink':.7}, annot=True, ax=ax,
            fmt='.2f',
            annot_kws={'fontsize':'large'})
    ax.set_title('Matriks Korelasi Dataset', fontweight='bold', fontsize='xx-large')

    if savefig:
        plt.savefig(
            _DIRIMAGE / 'grafik_korelasi_matriks.png',
            dpi=150)  

    return fig, ax

def plot_pairplot(df, savefig=_SAVE_IMAGE):
    grid = sns.pairplot(df, markers='+')
    fig = grid.fig
    fig.set_size_inches(15, 15)
    fig.tight_layout()
    fig.subplots_adjust(top=0.95)
    fig.suptitle('Grafik PairPlot Dataset', fontweight='bold', fontsize='xx-large')

    if savefig:
        plt.savefig(
            _DIRIMAGE / 'grafik_pairplot_dataset.png',
            dpi=150)  

    return grid
In [ ]:
plot_corr_mat(raw_dataset);
In [ ]:
plot_pairplot(raw_dataset);

Prapemrosesan dataset

In [ ]:
_SLICE_TRAIN = slice(None, '20061231')
_SLICE_TEST = slice('20070101', None)

print(':: MEMISAHKAN DATA UNTUK TRAINING DAN TEST')
dataset_nn = raw_dataset.copy()
nn_train = raw_dataset[_SLICE_TRAIN]
nn_test = raw_dataset[_SLICE_TEST]
:: MEMISAHKAN DATA UNTUK TRAINING DAN TEST
In [ ]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

print(':: NORMALISASI DATASET')
nn_train_scale = nn_train.copy()
nn_train_scale[:] = sc.fit_transform(nn_train[:])
nn_test_scale = nn_test.copy()
nn_test_scale[:] = sc.transform(nn_test[:])
dataset_nn_normalized = pd.concat([nn_train_scale, nn_test_scale], axis=0)
:: NORMALISASI DATASET
In [ ]:
sc_y = StandardScaler()
sc_y.scale_, sc_y.mean_, sc_y.var_, sc_y.n_samples_seen_ = (
    sc.scale_[-1], sc.mean_[-1], sc.var_[-1], sc.n_samples_seen_
)

ANN

In [ ]:
def train_test_split_ann(data, timesteps, date_start, target_column=None):
    feature_column = (
        data.columns[:-1] if target_column is None else
        data.drop(target_column, axis=1) 
    )

    table_ts = timeseries.timestep_table(
        data, columns=feature_column, keep_first=False,
        timesteps=timesteps
    )

    train = table_ts.loc[slice(None, date_start)][:-1]
    test = table_ts.loc[slice(date_start, None)]

    return train, test
In [ ]:
_test_date_start = '20070101'
ann_train_ts5, ann_test_ts5 = train_test_split_ann(
    dataset_nn_normalized, 5, _test_date_start
)

ann_train_ts10, ann_test_ts10 = train_test_split_ann(
    dataset_nn_normalized, 10, _test_date_start
)

ann_train_ts365, ann_test_ts365 = train_test_split_ann(
    dataset_nn_normalized, 365, _test_date_start
)

print(f'ann_train_ts5.shape = {ann_train_ts5.shape}')
print(f'ann_test_ts5.shape = {ann_test_ts5.shape}')
print(f'ann_train_ts10.shape = {ann_train_ts10.shape}')
print(f'ann_test_ts10.shape = {ann_test_ts10.shape}')
print(f'ann_train_ts365.shape = {ann_train_ts365.shape}')
print(f'ann_test_ts365.shape = {ann_test_ts365.shape}')
ann_train_ts5.shape = (3223, 61)
ann_test_ts5.shape = (731, 61)
ann_train_ts10.shape = (3218, 121)
ann_test_ts10.shape = (731, 121)
ann_train_ts365.shape = (2863, 4381)
ann_test_ts365.shape = (731, 4381)

RNN/LSTM/GRU

In [ ]:
def train_test_split_rnn(data, timesteps, date_start,
                         feature_columns=None, target_column=None):

    feature_columns = (
        data.columns.to_list()[:-1] if feature_columns is None else
        feature_columns
    )

    target_column = (
        data.columns.to_list()[-1:] if target_column is None else
        target_column
    )

    rnn_X, rnn_y = hk53.tensor_array(
        data, X_columns=feature_columns,
        timesteps=timesteps,
        y_out=True, y_columns=target_column
    )

    ix_split = data.index.get_loc(date_start) - timesteps

    X_train = rnn_X[:ix_split, :, :]
    y_train = rnn_y[:ix_split]

    X_test = rnn_X[ix_split:, :, :]
    y_test = rnn_y[ix_split:]
    
    return (X_train, y_train), (X_test, y_test)
In [ ]:
((rnn_X_train_ts5, rnn_y_train_ts5),
 (rnn_X_test_ts5, rnn_y_test_ts5)) = (
     train_test_split_rnn(
         dataset_nn_normalized, 5, _test_date_start
     )
 )

((rnn_X_train_ts10, rnn_y_train_ts10),
 (rnn_X_test_ts10, rnn_y_test_ts10)) = (
     train_test_split_rnn(
         dataset_nn_normalized, 10, _test_date_start
     )
 )

((rnn_X_train_ts365, rnn_y_train_ts365),
 (rnn_X_test_ts365, rnn_y_test_ts365)) = (
     train_test_split_rnn(
         dataset_nn_normalized, 365, _test_date_start
     )
 )
In [ ]:
print(f'rnn_X_train_ts5.shape = {rnn_X_train_ts5.shape}')
print(f'rnn_y_train_ts5.shape = {rnn_y_train_ts5.shape}')
print(f'rnn_X_test_ts5.shape = {rnn_X_test_ts5.shape}')
print(f'rnn_y_test_ts5.shape = {rnn_y_test_ts5.shape}')
print('='*10)
print(f'rnn_X_train_ts10.shape = {rnn_X_train_ts10.shape}')
print(f'rnn_y_train_ts10.shape = {rnn_y_train_ts10.shape}')
print(f'rnn_X_test_ts10.shape = {rnn_X_test_ts10.shape}')
print(f'rnn_y_test_ts10.shape = {rnn_y_test_ts10.shape}')
print('='*10)
print(f'rnn_X_train_ts365.shape = {rnn_X_train_ts365.shape}')
print(f'rnn_y_train_ts365.shape = {rnn_y_train_ts365.shape}')
print(f'rnn_X_test_ts365.shape = {rnn_X_test_ts365.shape}')
print(f'rnn_y_test_ts365.shape = {rnn_y_test_ts365.shape}')
rnn_X_train_ts5.shape = (3223, 5, 12)
rnn_y_train_ts5.shape = (3223,)
rnn_X_test_ts5.shape = (731, 5, 12)
rnn_y_test_ts5.shape = (731,)
==========
rnn_X_train_ts10.shape = (3218, 10, 12)
rnn_y_train_ts10.shape = (3218,)
rnn_X_test_ts10.shape = (731, 10, 12)
rnn_y_test_ts10.shape = (731,)
==========
rnn_X_train_ts365.shape = (2863, 365, 12)
rnn_y_train_ts365.shape = (2863,)
rnn_X_test_ts365.shape = (731, 365, 12)
rnn_y_test_ts365.shape = (731,)

TAHAP 4: MODEL KONSEPTUAL

Implementasi model NRECA dan FJMOCK dibahas sebelumnya pada LI-#3.

Metrik

In [ ]:
def _slice_in_index(df, date_start, date_end):
    dfc = df.copy()
    dfc['index'] = np.arange(0, dfc.shape[0])
    sub = dfc[slice(date_start, date_end)]
    return slice(sub['index'].iloc[0], sub['index'].iloc[-1] + 1)

def nse_bulanan_0608(simulasi, observasi):
    sim = simulasi[slice_bulanan0608]
    obs = observasi[slice_bulanan0608]
    return he.nse(sim, obs)

def nse_2p_0608(simulasi, observasi):
    sim = simulasi[slice_2p0608]
    obs = observasi[slice_2p0608]
    return he.nse(sim, obs)

slice_bulanan0608 = _slice_in_index(data_bulanan, '2006', '2008')
slice_2p0608 = _slice_in_index(data_2p, '2006', '2008')

metrics_bulanan = [he.rmse, he.r_squared, he.nse, nse_bulanan_0608]
metrics_2p = [he.rmse, he.r_squared, he.nse, nse_2p_0608]

metrics_parameter = {
    'met_names': ['RMSE', 'R2', 'NSE', 'NSE0608'],
    'met_sort': 'NSE',
    'met_min': False
}

NRECA

Kode & Parameter

In [ ]:
print(':: MEMODIFIKASI FUNGSI .model_NRECA')
def model_NRECA_custom(df, precip_col, pet_col, ndays_col,
                MSTOR, GSTOR, PSUB, GWF, CF, C, AREA,
                as_df=True):

    nreca = hk89.model_NRECA(
        df=df, precip_col=precip_col, pet_col=pet_col,
        MSTOR=MSTOR, GSTOR=GSTOR, PSUB=PSUB, GWF=GWF, CF=CF, C=C, AREA=AREA,
                as_df=True, report='flow'
    )

    nreca['DISCHARGE'] = (
        (nreca['FLOW']/1000) * AREA / (df[ndays_col] * 24 * 60 * 60))
    nreca.drop(['FLOW'], axis=1, inplace=True)
    if as_df is True:
        return nreca
    else:
        return nreca['DISCHARGE'].values

def plot_hidrograf(
    df, sim_col, obs_col,
    title='NRECA', add_title='Bulanan', 
    slice_model=None, obs_df=None,
    params_kws=None,
    savefig=_SAVE_IMAGE):
    fig, ax = plt.subplots(figsize=(20, 5))

    if slice_model is None:
        slice_model = slice(None, None)

    df.loc[slice_model, sim_col].plot(ax=ax, style='-', label='SIMULASI')

    if obs_df is None:
        df.loc[slice_model, obs_col].plot(
            ax=ax, style='--', label='OBSERVASI')
    else:
        obs_df.loc[slice_model, obs_col].plot(
            ax=ax, style='--', label='OBSERVASI')


    ax.set_title(
        f'GRAFIK PERBANDINGAN SIMULASI {title} DAN OBSERVASI ({add_title})',
        fontsize='xx-large', fontweight='bold')
    ax.set_ylabel('Debit $m^3/s$')
    ax.set_xlabel('Tanggal')
    ax.legend()
    ax.grid(True, axis='both')

    if params_kws is not None:
        _val = [f'${{{key}}}$:${{{value:.2f}}}$' for key, value in params_kws.items()]
        ax.text(0.01, 0.95, '$PARAMETER$\n'+'\n'.join(_val), transform=ax.transAxes,
                ha='left', va='top',
                bbox=dict(boxstyle='square', fc='w', alpha=0.6, ec='k'))

    fig.tight_layout()

    if savefig:
        plt.savefig(
            _DIRIMAGE / 'grafik_hidrograf_{}_{}.png'.format(
                clean_title(title), clean_title(add_title), 
            ), 
            dpi=150)  

    return fig, ax
:: MEMODIFIKASI FUNGSI .model_NRECA
In [ ]:
def obs_val(obs_df):
    return obs_df.loc[:, 'debit'].values

calib_nreca_params = {
    'MSTOR': [1000],
    'GSTOR': [100],
    'PSUB': np.arange(0.1, 0.9, 0.005),
    'GWF': np.arange(0.1, 0.9, 0.005), 
}

func_nreca_params = {
    'CF': 0.6,
    'C': 0.25,
    'AREA': 1450.6e6,

    'ndays_col': 'ndays',
    'precip_col': 'precip',
    'pet_col':'eto',
    'as_df': False,
}

Bulanan

In [ ]:
func_nreca_params['df'] = data_bulanan

if _USE_CALIBRATED_MODEL:
    print(':: USING CALIBRATED MODEL')
    nreca_res_bulanan = pd.read_csv(
        _DIRECTORY / _CALD_NRECA_BULANAN, index_col=0
    )
else:
    with timeit_context('cal_nreca_bulanan'):
        nreca_res_bulanan = hk90.calibration(
            observed=data_bulanan, observed_func=obs_val,
            func=model_NRECA_custom, calibration_parameter=calib_nreca_params,
            func_parameter=func_nreca_params,
            metrics=metrics_bulanan,
            **metrics_parameter
        )
    nreca_res_bulanan.to_csv(_DIRECTORY / _CALD_NRECA_BULANAN)
:: USING CALIBRATED MODEL
In [ ]:
nreca_bulanan_best = hk90._best_parameter(nreca_res_bulanan, calib_nreca_params)
func_nreca_params['df'] = data_bulanan
func_nreca_params['as_df'] = True
nreca_bulanan = model_NRECA_custom(**func_nreca_params, **nreca_bulanan_best)
if _SHOW_IMAGES:
    plot_hidrograf(nreca_bulanan, 'DISCHARGE', 'debit', obs_df=data_bulanan,
               params_kws=nreca_bulanan_best)

2 Periode

In [ ]:
from hidrokit.contrib.taruma import hk90

func_nreca_params['df'] = data_2p
func_nreca_params['as_df'] = False

if _USE_CALIBRATED_MODEL:
    print(':: USING CALIBRATED MODEL')
    nreca_res_2p = pd.read_csv(
        _DIRECTORY / _CALD_NRECA_2p, index_col=0
    )
else:
    with timeit_context('cal_nreca_2p'):
        nreca_res_2p = hk90.calibration(
            observed=data_2p, observed_func=obs_val<