#### NOTEBOOK DESCRIPTION from datetime import datetime NOTEBOOK_TITLE = 'taruma_udemy_som_megacasestudy' NOTEBOOK_VERSION = '1.0.0' NOTEBOOK_DATE = 1 # Set 1, if you want add date classifier NOTEBOOK_NAME = "{}_{}".format( NOTEBOOK_TITLE, NOTEBOOK_VERSION.replace('.','_') ) PROJECT_NAME = "{}_{}{}".format( NOTEBOOK_TITLE, NOTEBOOK_VERSION.replace('.','_'), "_" + datetime.utcnow().strftime("%Y%m%d_%H%M") if NOTEBOOK_DATE else "" ) print(f"Nama Notebook: {NOTEBOOK_NAME}") print(f"Nama Proyek: {PROJECT_NAME}") #### System Version import sys import keras print("versi python: {}".format(sys.version)) print("versi keras: {}".format(keras.__version__)) #### Load Notebook Extensions %load_ext google.colab.data_table #### Download dataset !wget -O SOM_megacase.zip "https://sds-platform-private.s3-us-east-2.amazonaws.com/uploads/P16-Mega-Case-Study.zip" !unzip SOM_megacase.zip #### Atur dataset path DATASET_DIRECTORY = 'Mega_Case_Study/' # Mega Case Study - Make a Hybrid Deep Learning Model # Part 1 - Identifying the Frauds with the Self-Organizing Map # Importing the libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt # Importing the dataset # Dataset http://archive.ics.uci.edu/ml/datasets/statlog+(australian+credit+approval) dataset = pd.read_csv(DATASET_DIRECTORY + 'Credit_Card_Applications.csv') dataset X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values print(f"X Dimension = {X.shape}") print(f"y Dimension = {y.shape}") # Feature Scaling from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) # Or using pip install (recent version minisom) !pip install minisom # Training the SOM from minisom import MiniSom som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5) som.random_weights_init(X) som.train_random(data=X, num_iteration=100) # Visualizing the results from pylab import bone, pcolor, colorbar, plot, show from pylab import rcParams rcParams['figure.figsize'] = 15, 10 bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): w = som.winner(x) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]], markeredgecolor=colors[y[i]], markerfacecolor='None', markersize=10, markeredgewidth=2) show() # Finding the frauds mappings = som.win_map(X) frauds = np.concatenate((mappings[(3, 1)], mappings[(5, 3)]), axis=0) frauds = sc.inverse_transform(frauds) pd.DataFrame(frauds) # Part 2 - Going from Unsupervised to Supervised Deep Learning # Creating the matrix of features customers = dataset.iloc[:, 1:].values print(f"customers.shape = {customers.shape}") # Creating the dependent variable is_fraud = np.zeros(len(dataset)) is_fraud.shape for i in range(len(dataset)): if dataset.iloc[i, 0] in frauds: is_fraud[i] = 1 # Artificial Neural Networks # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() customers = sc.fit_transform(customers) # Importing Keras from keras.models import Sequential from keras.layers import Dense classifier = Sequential() classifier.add(Dense( units=2, kernel_initializer='uniform', activation='relu', input_dim=15) ) classifier.add(Dense( units=1, kernel_initializer='uniform', activation='sigmoid') ) classifier.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'] ) classifier.fit(customers, is_fraud, batch_size=1, epochs=2) y_pred = classifier.predict(customers) y_pred = np.concatenate((dataset.iloc[:, 0:1].values, y_pred), axis=1) y_pred = y_pred[y_pred[:, 1].argsort()] pd.DataFrame(y_pred[:, :])