#### NOTEBOOK DESCRIPTION

from datetime import datetime

NOTEBOOK_TITLE = 'taruma_udemy_som_megacasestudy'
NOTEBOOK_VERSION = '1.0.0'
NOTEBOOK_DATE = 1 # Set 1, if you want add date classifier

NOTEBOOK_NAME = "{}_{}".format(
    NOTEBOOK_TITLE, 
    NOTEBOOK_VERSION.replace('.','_')
)
PROJECT_NAME = "{}_{}{}".format(
    NOTEBOOK_TITLE, 
    NOTEBOOK_VERSION.replace('.','_'), 
    "_" + datetime.utcnow().strftime("%Y%m%d_%H%M") if NOTEBOOK_DATE else ""
)

print(f"Nama Notebook: {NOTEBOOK_NAME}")
print(f"Nama Proyek: {PROJECT_NAME}")

#### System Version
import sys
import keras
print("versi python: {}".format(sys.version))
print("versi keras: {}".format(keras.__version__))

#### Load Notebook Extensions
%load_ext google.colab.data_table

#### Download dataset
!wget -O SOM_megacase.zip "https://sds-platform-private.s3-us-east-2.amazonaws.com/uploads/P16-Mega-Case-Study.zip"
!unzip SOM_megacase.zip

#### Atur dataset path
DATASET_DIRECTORY = 'Mega_Case_Study/'

# Mega Case Study - Make a Hybrid Deep Learning Model
# Part 1 - Identifying the Frauds with the Self-Organizing Map

# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing the dataset
# Dataset http://archive.ics.uci.edu/ml/datasets/statlog+(australian+credit+approval)
dataset = pd.read_csv(DATASET_DIRECTORY + 'Credit_Card_Applications.csv')
dataset

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
print(f"X Dimension = {X.shape}")
print(f"y Dimension = {y.shape}")

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)

# Or using pip install (recent version minisom)
!pip install minisom

# Training the SOM
from minisom import MiniSom
som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(data=X, num_iteration=100)

# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
from pylab import rcParams
rcParams['figure.figsize'] = 15, 10
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
         markeredgecolor=colors[y[i]],
         markerfacecolor='None',
         markersize=10,
         markeredgewidth=2)
show()

# Finding the frauds
mappings = som.win_map(X)
frauds = np.concatenate((mappings[(3, 1)], mappings[(5, 3)]), axis=0)
frauds = sc.inverse_transform(frauds)
pd.DataFrame(frauds)

# Part 2 - Going from Unsupervised to Supervised Deep Learning
# Creating the matrix of features
customers = dataset.iloc[:, 1:].values
print(f"customers.shape = {customers.shape}")

# Creating the dependent variable
is_fraud = np.zeros(len(dataset))
is_fraud.shape

for i in range(len(dataset)):
    if dataset.iloc[i, 0] in frauds:
        is_fraud[i] = 1

# Artificial Neural Networks
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
customers = sc.fit_transform(customers)

# Importing Keras
from keras.models import Sequential
from keras.layers import Dense

classifier = Sequential()
classifier.add(Dense(
    units=2, kernel_initializer='uniform', activation='relu', input_dim=15)
)
classifier.add(Dense(
    units=1, kernel_initializer='uniform', activation='sigmoid')
)
classifier.compile(
    optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']
)
classifier.fit(customers, is_fraud, batch_size=1, epochs=2)

y_pred = classifier.predict(customers)
y_pred = np.concatenate((dataset.iloc[:, 0:1].values, y_pred), axis=1)
y_pred = y_pred[y_pred[:, 1].argsort()]
pd.DataFrame(y_pred[:, :])