Berdasarkan isu #53: request: buat tabel/tensor untuk pemodelan deep learning LSTM
Deskripsi permasalahan:
# AKSES GOOGLE DRIVE
from google.colab import drive
drive.mount('/content/gdrive')
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
# DRIVE PATH
DRIVE_DROP_PATH = '/content/gdrive/My Drive/Colab Notebooks/_dropbox'
DRIVE_DATASET_PATH = '/content/gdrive/My Drive/Colab Notebooks/_dataset/uma_pamarayan'
DATASET = DRIVE_DROP_PATH + '/dataset_hidrologi_pamarayan_1998_2008.csv'
import pandas as pd
import numpy as np
dataset = pd.read_csv(DATASET, index_col=0, parse_dates=True)['19980301':] # 2 bulan pertama tidak ada data di debit
dataset.head()
hujan_bojong_manik | hujan_gunung_tunggal | hujan_pasir_ona | hujan_sampang_peundeuy | hujan_cimarga | hujan_bd_pamarayan | hujan_ciminyak_cilaki | hujan_gardu_tanjak | debit_bd_pamarayan | |
---|---|---|---|---|---|---|---|---|---|
1998-03-01 | 0.0 | 0.0 | 3.0 | 7.0 | 0.0 | 12.0 | 0.0 | 0.0 | 90.12 |
1998-03-02 | 0.0 | 4.0 | 36.0 | 9.0 | 26.0 | 0.0 | 5.0 | 32.0 | 97.90 |
1998-03-03 | 4.5 | 0.0 | 0.0 | 11.0 | 10.0 | 2.0 | 3.0 | 21.0 | 88.90 |
1998-03-04 | 0.0 | 0.0 | 46.0 | 5.0 | 24.0 | 6.0 | 11.0 | 13.0 | 90.30 |
1998-03-05 | 32.0 | 0.0 | 0.0 | 22.0 | 8.0 | 14.0 | 0.0 | 21.0 | 210.06 |
dataset.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 3959 entries, 1998-03-01 to 2008-12-31 Data columns (total 9 columns): hujan_bojong_manik 3959 non-null float64 hujan_gunung_tunggal 3959 non-null float64 hujan_pasir_ona 3959 non-null float64 hujan_sampang_peundeuy 3959 non-null float64 hujan_cimarga 3959 non-null float64 hujan_bd_pamarayan 3959 non-null float64 hujan_ciminyak_cilaki 3959 non-null float64 hujan_gardu_tanjak 3959 non-null float64 debit_bd_pamarayan 3959 non-null float64 dtypes: float64(9) memory usage: 309.3 KB
def _columns_index(dataframe, columns):
columns_name = dataframe.columns
columns_index = []
for column in columns:
columns_index.append(columns_name.get_loc(column))
return columns_index
def _get_y(array, timesteps, columns_index):
y = []
for col in columns_index:
y.append(array[timesteps:, col])
if len(columns_index) == 1:
return y[0]
else:
return np.stack(y, axis=1)
def _get_x_tensor(array, timesteps, columns_index):
X = []
rows, _ = array.shape
for col in columns_index:
array_each_column = []
for row in range(timesteps, rows):
array_each_column.append(array[row-timesteps:row, col])
X.append(array_each_column)
return np.stack(X, axis=2)
def tensor_array(
dataframe, timesteps, X_columns=None, y_out=False, y_columns=None
):
n_rows, n_cols = dataframe.shape
array = dataframe.values
# X array
if X_columns is None:
X_index = range(n_cols)
else:
X_index = _columns_index(dataframe, X_columns)
X = _get_x_tensor(array, timesteps=timesteps, columns_index=X_index)
# y array
if y_out is True:
if y_columns is None:
y_index = [n_cols-1]
else:
y_index = _columns_index(dataframe, y_columns)
y = _get_y(array, timesteps=timesteps, columns_index=y_index)
return X, y
return X
Dataset memiliki $3959$ baris dengan $8$ variabel independen, dan $1$ variabel dependen.
$8$ variabel bebas:
hujan_bojong_manik
,hujan_gunung_tunggal
,hujan_pasir_ona
,hujan_sampang_peundeuy
,hujan_cimarga
,hujan_bd_pamarayan
,hujan_ciminyak_cilaki
,hujan_gardu_tanjak
,$1$ variabel terikat:
debit_bd_pamarayan
Dengan menggunakan timesteps sebesar $5$ hari, maka dimensi tensor input 3d sebesar $X = (3954, 5, 9)$ dengan output $y = (3954,)$
$$y^{t} = f(X_i^{t_s}, y^{t_s})$$TIMESTEPS = 5
X, y = tensor_array(
dataset, timesteps=TIMESTEPS,
X_columns=None, y_out=True, y_columns=['debit_bd_pamarayan']
)
X.shape
(3954, 5, 9)
y.shape
(3954,)
print(X)
[[[ 0. 0. 3. ... 0. 0. 90.12] [ 0. 4. 36. ... 5. 32. 97.9 ] [ 4.5 0. 0. ... 3. 21. 88.9 ] [ 0. 0. 46. ... 11. 13. 90.3 ] [ 32. 0. 0. ... 0. 21. 210.06]] [[ 0. 4. 36. ... 5. 32. 97.9 ] [ 4.5 0. 0. ... 3. 21. 88.9 ] [ 0. 0. 46. ... 11. 13. 90.3 ] [ 32. 0. 0. ... 0. 21. 210.06] [ 12. 0. 7. ... 16. 0. 82.9 ]] [[ 4.5 0. 0. ... 3. 21. 88.9 ] [ 0. 0. 46. ... 11. 13. 90.3 ] [ 32. 0. 0. ... 0. 21. 210.06] [ 12. 0. 7. ... 16. 0. 82.9 ] [ 14. 0. 11. ... 25. 22. 274.42]] ... [[ 0. 13. 0. ... 0. 3. 21.27] [ 17. 27. 0. ... 0. 2. 83.27] [ 14. 23.5 4. ... 1. 35. 209.27] [ 12. 15.7 7. ... 0. 14. 134.83] [ 10. 19. 0. ... 0. 0. 81.88]] [[ 17. 27. 0. ... 0. 2. 83.27] [ 14. 23.5 4. ... 1. 35. 209.27] [ 12. 15.7 7. ... 0. 14. 134.83] [ 10. 19. 0. ... 0. 0. 81.88] [ 7. 21.7 11. ... 0. 12. 20.14]] [[ 14. 23.5 4. ... 1. 35. 209.27] [ 12. 15.7 7. ... 0. 14. 134.83] [ 10. 19. 0. ... 0. 0. 81.88] [ 7. 21.7 11. ... 0. 12. 20.14] [ 6.05 17.5 21. ... 0. 10. 208.54]]]
print(y)
[ 82.9 274.42 216.36 ... 20.14 208.54 208.14]
- 20190926 - 1.0.0 - Initial
Source code in this notebook is licensed under a MIT License. Data in this notebook is licensed under a Creative Common Attribution 4.0 International.