En esta lección hacemos ejemplo de clasificación para detectar fraude en el uso de trajeta de crédito. El ejemplo es adaptado de fchollet. Los datos están disponible en Kaggle Credit Card Fraud Detection.
El propósito del ejemplo es ilustrar el caso de datos categóricos altamente desbalanceados. Un problema muy común en la práctica. Usaremos varias métricas para evaluar el modelo.
La técnica se puede extender a la detección de datos anómalos en grades conjuntos de datos.
from __future__ import absolute_import, division, print_function, unicode_literals
#
#import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
#
from tensorflow.keras.models import Model
#
from tensorflow.keras.layers import Dense, Input, Activation, Dropout
#
from tensorflow.keras.utils import plot_model
#
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
#
from tensorflow.keras. metrics import FalseNegatives, FalsePositives, TrueNegatives
from tensorflow.keras. metrics import TruePositives, Precision, Recall
#
from tensorflow.keras.optimizers import Adam
#
from tensorflow.keras import callbacks
#
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import roc_auc_score
print(tf.__version__)
2.12.0
Excepto por la transacción y el monto, no sabemos cuáles son las otras columnas (por razones de privacidad). Lo único que sabemos es que esas columnas que se desconocen ya se han escalado. Son 284.807 registros y 31 variables.
El monto de la transacción es relativamente pequeño. La media de todas las transacciones realizadas es de aproximadamente USD 88.
No hay valores faltantes
La mayoría de las transacciones fueron no fraudulentas (99,82%) del tiempo, mientras que las transacciones fraudulentas ocurren (0,18%) del tiempo en el marco de datos.
Transformación PCA: La descripción de los datos dice que todas las características pasaron por una transformación PCA (técnica de reducción de dimensionalidad) (excepto por tiempo y cantidad).
Escalado: tenga en cuenta que para implementar una transformación de PCA es necesario escalar previamente las variables (features). (En este caso, todas las variables del grupo V se han escalado o al menos eso es lo que suponemos que hicieron las personas que desarrollan el conjunto de datos).
fname = '../Datos/fraude/creditcard.csv'
training = pd.read_csv(fname)
training
Time | V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | ... | V21 | V22 | V23 | V24 | V25 | V26 | V27 | V28 | Amount | Class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | -1.359807 | -0.072781 | 2.536347 | 1.378155 | -0.338321 | 0.462388 | 0.239599 | 0.098698 | 0.363787 | ... | -0.018307 | 0.277838 | -0.110474 | 0.066928 | 0.128539 | -0.189115 | 0.133558 | -0.021053 | 149.62 | 0 |
1 | 0.0 | 1.191857 | 0.266151 | 0.166480 | 0.448154 | 0.060018 | -0.082361 | -0.078803 | 0.085102 | -0.255425 | ... | -0.225775 | -0.638672 | 0.101288 | -0.339846 | 0.167170 | 0.125895 | -0.008983 | 0.014724 | 2.69 | 0 |
2 | 1.0 | -1.358354 | -1.340163 | 1.773209 | 0.379780 | -0.503198 | 1.800499 | 0.791461 | 0.247676 | -1.514654 | ... | 0.247998 | 0.771679 | 0.909412 | -0.689281 | -0.327642 | -0.139097 | -0.055353 | -0.059752 | 378.66 | 0 |
3 | 1.0 | -0.966272 | -0.185226 | 1.792993 | -0.863291 | -0.010309 | 1.247203 | 0.237609 | 0.377436 | -1.387024 | ... | -0.108300 | 0.005274 | -0.190321 | -1.175575 | 0.647376 | -0.221929 | 0.062723 | 0.061458 | 123.50 | 0 |
4 | 2.0 | -1.158233 | 0.877737 | 1.548718 | 0.403034 | -0.407193 | 0.095921 | 0.592941 | -0.270533 | 0.817739 | ... | -0.009431 | 0.798278 | -0.137458 | 0.141267 | -0.206010 | 0.502292 | 0.219422 | 0.215153 | 69.99 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
284802 | 172786.0 | -11.881118 | 10.071785 | -9.834783 | -2.066656 | -5.364473 | -2.606837 | -4.918215 | 7.305334 | 1.914428 | ... | 0.213454 | 0.111864 | 1.014480 | -0.509348 | 1.436807 | 0.250034 | 0.943651 | 0.823731 | 0.77 | 0 |
284803 | 172787.0 | -0.732789 | -0.055080 | 2.035030 | -0.738589 | 0.868229 | 1.058415 | 0.024330 | 0.294869 | 0.584800 | ... | 0.214205 | 0.924384 | 0.012463 | -1.016226 | -0.606624 | -0.395255 | 0.068472 | -0.053527 | 24.79 | 0 |
284804 | 172788.0 | 1.919565 | -0.301254 | -3.249640 | -0.557828 | 2.630515 | 3.031260 | -0.296827 | 0.708417 | 0.432454 | ... | 0.232045 | 0.578229 | -0.037501 | 0.640134 | 0.265745 | -0.087371 | 0.004455 | -0.026561 | 67.88 | 0 |
284805 | 172788.0 | -0.240440 | 0.530483 | 0.702510 | 0.689799 | -0.377961 | 0.623708 | -0.686180 | 0.679145 | 0.392087 | ... | 0.265245 | 0.800049 | -0.163298 | 0.123205 | -0.569159 | 0.546668 | 0.108821 | 0.104533 | 10.00 | 0 |
284806 | 172792.0 | -0.533413 | -0.189733 | 0.703337 | -0.506271 | -0.012546 | -0.649617 | 1.577006 | -0.414650 | 0.486180 | ... | 0.261057 | 0.643078 | 0.376777 | 0.008797 | -0.473649 | -0.818267 | -0.002415 | 0.013649 | 217.00 | 0 |
284807 rows × 31 columns
# Separa features y targets
targets = training.pop('Class')
targets = np.array(targets, dtype = 'uint8')
targets.reshape((targets.shape[0],1))
#
features = np.array(training,dtype = 'float32')
#
print("tamaño de los targets: ",targets.shape)
print("tamaño de los features: ",features.shape)
tamaño de los targets: (284807,) tamaño de los features: (284807, 30)
#
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]
print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))
Number of training samples: 227846 Number of validation samples: 56961
# crea el objeto StandardScaler
scaler = StandardScaler()
# Ajusta los parámetros del scaler
scaler.fit(train_features)
# escala training y test
train_features = scaler.transform(train_features)
val_features = scaler.transform(val_features)
# Cuenta las frecuencias de los datos
counts = np.bincount(train_targets)
print(
"Number of positive samples in training data: {} ({:.2f}% of total)".format(
counts[1], 100 * float(counts[1]) / len(train_targets)
)
)
# Crea los pesos para el entrenamiento. Más peso a los menos frecuentes (1)
weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
Number of positive samples in training data: 417 (0.18% of total)
print(weight_for_1)
print(weight_for_0)
0.002398081534772182 4.396976638863118e-06
inputs = Input(shape=(train_features.shape[1],),name='capa_entrada')
# vamos construyendo capa por capa
x = Activation('relu')(inputs)
x = Dense(256, activation='relu',name='primera_capa_oculta')(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu',name='segunda_capa_oculta')(x)
x = Dropout(0.3)(x)
outputs = Dense(1, activation='sigmoid', name='capa_salida')(x)
# Creamos ahora el modelo
model = Model(inputs=inputs, outputs=outputs)
Usamos el regularizador Dropout. Dropout paper. Este regularizador deja por fuera del entrenamiento en cada epoch un porcentaje de las neuronas de la capa, escogidas de forma aleatoria en cada epoch. Por ejemplo Dropout(0.1) deja el 10% de las neuronas por fuera del entrenamiento en cada epoch. En el artículo muestran que en muchos casos este regularizador funciona mejor que los clásicos L1 y L2.
model.summary()
plot_model(model, to_file='../Imagenes/fraude_model.png',
show_shapes=True)
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= capa_entrada (InputLayer) [(None, 30)] 0 _________________________________________________________________ activation (Activation) (None, 30) 0 _________________________________________________________________ primera_capa_oculta (Dense) (None, 256) 7936 _________________________________________________________________ dropout (Dropout) (None, 256) 0 _________________________________________________________________ segunda_capa_oculta (Dense) (None, 256) 65792 _________________________________________________________________ dropout_1 (Dropout) (None, 256) 0 _________________________________________________________________ capa_salida (Dense) (None, 1) 257 ================================================================= Total params: 73,985 Trainable params: 73,985 Non-trainable params: 0 _________________________________________________________________
# métricas
metrics = [
FalseNegatives(name="fn"),
FalsePositives(name="fp"),
TrueNegatives(name="tn"),
TruePositives(name="tp"),
Precision(name="precision"),
Recall(name="recall"),
]
model.compile(optimizer=Adam(1e-2), loss="binary_crossentropy", metrics=metrics)
Tiene en cuenta los pesos de las transacciones
callbacks = [callbacks.ModelCheckpoint("../Datos/fraude/fraud_model_at_epoch_{epoch}.h5")]
class_weight = {0: weight_for_0, 1: weight_for_1}
history = model.fit(
train_features,
train_targets,
batch_size=2048,
epochs=30,
verbose=2,
callbacks=callbacks,
validation_data=(val_features, val_targets),
class_weight=class_weight,
)
Epoch 1/30 112/112 - 18s - loss: 2.5566e-06 - fn: 47.0000 - fp: 29882.0000 - tn: 197547.0000 - tp: 370.0000 - precision: 0.0122 - recall: 0.8873 - val_loss: 0.0420 - val_fn: 16.0000 - val_fp: 265.0000 - val_tn: 56621.0000 - val_tp: 59.0000 - val_precision: 0.1821 - val_recall: 0.7867 Epoch 2/30 112/112 - 8s - loss: 1.6997e-06 - fn: 41.0000 - fp: 8006.0000 - tn: 219423.0000 - tp: 376.0000 - precision: 0.0449 - recall: 0.9017 - val_loss: 0.1765 - val_fn: 7.0000 - val_fp: 2572.0000 - val_tn: 54314.0000 - val_tp: 68.0000 - val_precision: 0.0258 - val_recall: 0.9067 Epoch 3/30 112/112 - 8s - loss: 1.6099e-06 - fn: 41.0000 - fp: 8878.0000 - tn: 218551.0000 - tp: 376.0000 - precision: 0.0406 - recall: 0.9017 - val_loss: 0.0939 - val_fn: 10.0000 - val_fp: 1488.0000 - val_tn: 55398.0000 - val_tp: 65.0000 - val_precision: 0.0419 - val_recall: 0.8667 Epoch 4/30 112/112 - 8s - loss: 1.4978e-06 - fn: 43.0000 - fp: 7235.0000 - tn: 220194.0000 - tp: 374.0000 - precision: 0.0492 - recall: 0.8969 - val_loss: 0.0420 - val_fn: 13.0000 - val_fp: 279.0000 - val_tn: 56607.0000 - val_tp: 62.0000 - val_precision: 0.1818 - val_recall: 0.8267 Epoch 5/30 112/112 - 10s - loss: 1.5247e-06 - fn: 37.0000 - fp: 7839.0000 - tn: 219590.0000 - tp: 380.0000 - precision: 0.0462 - recall: 0.9113 - val_loss: 0.1506 - val_fn: 9.0000 - val_fp: 2079.0000 - val_tn: 54807.0000 - val_tp: 66.0000 - val_precision: 0.0308 - val_recall: 0.8800 Epoch 6/30 112/112 - 8s - loss: 1.3110e-06 - fn: 35.0000 - fp: 5885.0000 - tn: 221544.0000 - tp: 382.0000 - precision: 0.0610 - recall: 0.9161 - val_loss: 0.0408 - val_fn: 12.0000 - val_fp: 512.0000 - val_tn: 56374.0000 - val_tp: 63.0000 - val_precision: 0.1096 - val_recall: 0.8400 Epoch 7/30 112/112 - 8s - loss: 1.4277e-06 - fn: 27.0000 - fp: 8756.0000 - tn: 218673.0000 - tp: 390.0000 - precision: 0.0426 - recall: 0.9353 - val_loss: 0.0642 - val_fn: 12.0000 - val_fp: 801.0000 - val_tn: 56085.0000 - val_tp: 63.0000 - val_precision: 0.0729 - val_recall: 0.8400 Epoch 8/30 112/112 - 8s - loss: 1.2510e-06 - fn: 33.0000 - fp: 6961.0000 - tn: 220468.0000 - tp: 384.0000 - precision: 0.0523 - recall: 0.9209 - val_loss: 0.0816 - val_fn: 9.0000 - val_fp: 1471.0000 - val_tn: 55415.0000 - val_tp: 66.0000 - val_precision: 0.0429 - val_recall: 0.8800 Epoch 9/30 112/112 - 8s - loss: 1.0318e-06 - fn: 23.0000 - fp: 6368.0000 - tn: 221061.0000 - tp: 394.0000 - precision: 0.0583 - recall: 0.9448 - val_loss: 0.0197 - val_fn: 13.0000 - val_fp: 270.0000 - val_tn: 56616.0000 - val_tp: 62.0000 - val_precision: 0.1867 - val_recall: 0.8267 Epoch 10/30 112/112 - 9s - loss: 1.0231e-06 - fn: 28.0000 - fp: 6636.0000 - tn: 220793.0000 - tp: 389.0000 - precision: 0.0554 - recall: 0.9329 - val_loss: 0.0340 - val_fn: 11.0000 - val_fp: 590.0000 - val_tn: 56296.0000 - val_tp: 64.0000 - val_precision: 0.0979 - val_recall: 0.8533 Epoch 11/30 112/112 - 9s - loss: 8.7733e-07 - fn: 19.0000 - fp: 5936.0000 - tn: 221493.0000 - tp: 398.0000 - precision: 0.0628 - recall: 0.9544 - val_loss: 0.0548 - val_fn: 11.0000 - val_fp: 1061.0000 - val_tn: 55825.0000 - val_tp: 64.0000 - val_precision: 0.0569 - val_recall: 0.8533 Epoch 12/30 112/112 - 8s - loss: 1.1749e-06 - fn: 27.0000 - fp: 9181.0000 - tn: 218248.0000 - tp: 390.0000 - precision: 0.0407 - recall: 0.9353 - val_loss: 0.1353 - val_fn: 8.0000 - val_fp: 2772.0000 - val_tn: 54114.0000 - val_tp: 67.0000 - val_precision: 0.0236 - val_recall: 0.8933 Epoch 13/30 112/112 - 8s - loss: 9.0946e-07 - fn: 23.0000 - fp: 5973.0000 - tn: 221456.0000 - tp: 394.0000 - precision: 0.0619 - recall: 0.9448 - val_loss: 0.0173 - val_fn: 12.0000 - val_fp: 281.0000 - val_tn: 56605.0000 - val_tp: 63.0000 - val_precision: 0.1831 - val_recall: 0.8400 Epoch 14/30 112/112 - 8s - loss: 8.7442e-07 - fn: 17.0000 - fp: 5818.0000 - tn: 221611.0000 - tp: 400.0000 - precision: 0.0643 - recall: 0.9592 - val_loss: 0.1959 - val_fn: 7.0000 - val_fp: 3019.0000 - val_tn: 53867.0000 - val_tp: 68.0000 - val_precision: 0.0220 - val_recall: 0.9067 Epoch 15/30 112/112 - 8s - loss: 9.7855e-07 - fn: 24.0000 - fp: 8253.0000 - tn: 219176.0000 - tp: 393.0000 - precision: 0.0455 - recall: 0.9424 - val_loss: 0.0145 - val_fn: 12.0000 - val_fp: 217.0000 - val_tn: 56669.0000 - val_tp: 63.0000 - val_precision: 0.2250 - val_recall: 0.8400 Epoch 16/30 112/112 - 8s - loss: 8.3831e-07 - fn: 18.0000 - fp: 6456.0000 - tn: 220973.0000 - tp: 399.0000 - precision: 0.0582 - recall: 0.9568 - val_loss: 0.0388 - val_fn: 11.0000 - val_fp: 763.0000 - val_tn: 56123.0000 - val_tp: 64.0000 - val_precision: 0.0774 - val_recall: 0.8533 Epoch 17/30 112/112 - 8s - loss: 6.8649e-07 - fn: 16.0000 - fp: 5881.0000 - tn: 221548.0000 - tp: 401.0000 - precision: 0.0638 - recall: 0.9616 - val_loss: 0.0558 - val_fn: 10.0000 - val_fp: 1355.0000 - val_tn: 55531.0000 - val_tp: 65.0000 - val_precision: 0.0458 - val_recall: 0.8667 Epoch 18/30 112/112 - 8s - loss: 6.7273e-07 - fn: 14.0000 - fp: 6044.0000 - tn: 221385.0000 - tp: 403.0000 - precision: 0.0625 - recall: 0.9664 - val_loss: 0.0501 - val_fn: 10.0000 - val_fp: 1023.0000 - val_tn: 55863.0000 - val_tp: 65.0000 - val_precision: 0.0597 - val_recall: 0.8667 Epoch 19/30 112/112 - 9s - loss: 6.2794e-07 - fn: 14.0000 - fp: 6144.0000 - tn: 221285.0000 - tp: 403.0000 - precision: 0.0616 - recall: 0.9664 - val_loss: 0.0510 - val_fn: 11.0000 - val_fp: 1209.0000 - val_tn: 55677.0000 - val_tp: 64.0000 - val_precision: 0.0503 - val_recall: 0.8533 Epoch 20/30 112/112 - 9s - loss: 5.8497e-07 - fn: 11.0000 - fp: 6114.0000 - tn: 221315.0000 - tp: 406.0000 - precision: 0.0623 - recall: 0.9736 - val_loss: 0.0133 - val_fn: 13.0000 - val_fp: 251.0000 - val_tn: 56635.0000 - val_tp: 62.0000 - val_precision: 0.1981 - val_recall: 0.8267 Epoch 21/30 112/112 - 9s - loss: 6.1607e-07 - fn: 13.0000 - fp: 6033.0000 - tn: 221396.0000 - tp: 404.0000 - precision: 0.0628 - recall: 0.9688 - val_loss: 0.0564 - val_fn: 10.0000 - val_fp: 1372.0000 - val_tn: 55514.0000 - val_tp: 65.0000 - val_precision: 0.0452 - val_recall: 0.8667 Epoch 22/30 112/112 - 9s - loss: 6.0616e-07 - fn: 8.0000 - fp: 5761.0000 - tn: 221668.0000 - tp: 409.0000 - precision: 0.0663 - recall: 0.9808 - val_loss: 0.1833 - val_fn: 11.0000 - val_fp: 4029.0000 - val_tn: 52857.0000 - val_tp: 64.0000 - val_precision: 0.0156 - val_recall: 0.8533 Epoch 23/30 112/112 - 9s - loss: 6.2629e-07 - fn: 14.0000 - fp: 6845.0000 - tn: 220584.0000 - tp: 403.0000 - precision: 0.0556 - recall: 0.9664 - val_loss: 0.0181 - val_fn: 12.0000 - val_fp: 340.0000 - val_tn: 56546.0000 - val_tp: 63.0000 - val_precision: 0.1563 - val_recall: 0.8400 Epoch 24/30 112/112 - 8s - loss: 6.7303e-07 - fn: 12.0000 - fp: 7362.0000 - tn: 220067.0000 - tp: 405.0000 - precision: 0.0521 - recall: 0.9712 - val_loss: 0.0110 - val_fn: 12.0000 - val_fp: 197.0000 - val_tn: 56689.0000 - val_tp: 63.0000 - val_precision: 0.2423 - val_recall: 0.8400 Epoch 25/30 112/112 - 8s - loss: 5.3338e-07 - fn: 11.0000 - fp: 5530.0000 - tn: 221899.0000 - tp: 406.0000 - precision: 0.0684 - recall: 0.9736 - val_loss: 0.0619 - val_fn: 10.0000 - val_fp: 1571.0000 - val_tn: 55315.0000 - val_tp: 65.0000 - val_precision: 0.0397 - val_recall: 0.8667 Epoch 26/30 112/112 - 8s - loss: 4.9341e-07 - fn: 9.0000 - fp: 5615.0000 - tn: 221814.0000 - tp: 408.0000 - precision: 0.0677 - recall: 0.9784 - val_loss: 0.0215 - val_fn: 12.0000 - val_fp: 474.0000 - val_tn: 56412.0000 - val_tp: 63.0000 - val_precision: 0.1173 - val_recall: 0.8400 Epoch 27/30 112/112 - 9s - loss: 4.9612e-07 - fn: 9.0000 - fp: 5366.0000 - tn: 222063.0000 - tp: 408.0000 - precision: 0.0707 - recall: 0.9784 - val_loss: 0.0494 - val_fn: 10.0000 - val_fp: 1224.0000 - val_tn: 55662.0000 - val_tp: 65.0000 - val_precision: 0.0504 - val_recall: 0.8667 Epoch 28/30 112/112 - 9s - loss: 4.9080e-07 - fn: 7.0000 - fp: 5557.0000 - tn: 221872.0000 - tp: 410.0000 - precision: 0.0687 - recall: 0.9832 - val_loss: 0.1207 - val_fn: 9.0000 - val_fp: 2618.0000 - val_tn: 54268.0000 - val_tp: 66.0000 - val_precision: 0.0246 - val_recall: 0.8800 Epoch 29/30 112/112 - 9s - loss: 5.3789e-07 - fn: 10.0000 - fp: 5416.0000 - tn: 222013.0000 - tp: 407.0000 - precision: 0.0699 - recall: 0.9760 - val_loss: 0.0202 - val_fn: 12.0000 - val_fp: 455.0000 - val_tn: 56431.0000 - val_tp: 63.0000 - val_precision: 0.1216 - val_recall: 0.8400 Epoch 30/30 112/112 - 9s - loss: 4.6956e-07 - fn: 6.0000 - fp: 5282.0000 - tn: 222147.0000 - tp: 411.0000 - precision: 0.0722 - recall: 0.9856 - val_loss: 0.0276 - val_fn: 11.0000 - val_fp: 609.0000 - val_tn: 56277.0000 - val_tp: 64.0000 - val_precision: 0.0951 - val_recall: 0.8533
# predict the training set
y_pred_train = model.predict(train_features)
y_pred_train[y_pred_train > 0.5] = 1
y_pred_train[y_pred_train <= 0.5] = 0
y_pred_train.reshape((y_pred_train.shape[0]))
# Predicting the validation set
y_pred_val = model.predict(val_features)
y_pred_val[y_pred_val > 0.5] = 1
y_pred_val[y_pred_val <= 0.5] = 0
y_pred_val.reshape((y_pred_val.shape[0]))
array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)
Calcula métricas
#
# elimina dimensiones sobrantes
val_targets = np.squeeze(val_targets)
y_pred_val = np.squeeze(y_pred_val)
train_targets = np.squeeze(train_targets)
y_pred_train = np.squeeze(y_pred_train)
#
# falsos negativos validación
fn_val = FalseNegatives()
fn_val.update_state(val_targets, y_pred_val)
fn_val = fn_val.result().numpy()
#
# falsos negativos entrenamiento
fn_train = FalseNegatives()
fn_train.update_state(train_targets, y_pred_train)
fn_train = fn_train.result().numpy()
#
# falsos positivos validación
fp_val = FalsePositives()
fp_val.update_state(val_targets, y_pred_val)
fp_val = fp_val.result().numpy()
#
# falsos positivos entrenamiento
fp_train = FalsePositives()
fp_train.update_state(train_targets, y_pred_train)
fp_train = fp_train.result().numpy()
#
# Precision validación
pre_val = Precision()
pre_val.update_state(val_targets, y_pred_val)
pre_val = pre_val.result().numpy()
#
# falsos negativos entrenamiento
pre_train = Precision()
pre_train.update_state(train_targets, y_pred_train)
pre_train = pre_train.result().numpy()
#
# recall validación
re_val = Recall()
re_val.update_state(val_targets, y_pred_val)
re_val = re_val.result().numpy()
#
# recall entrenamiento
re_train = Recall()
re_train.update_state(train_targets, y_pred_train)
re_train = re_train.result().numpy()
#
# diccionario
metricas = {'Falsos_positivos_train':fp_train, 'Falsos_positivos_val':fp_val,
'%Falsos_positivos': np.round((fp_train+fp_val)/ len(training)*100,4),
'Falsos_negativos_train':fn_train, 'Falsos_negativos_val':fn_val,
'%Falsos_negativos': np.round((fn_train+fn_val)/ len(training)*100,4),
'Precision_train': pre_train, 'Precision_val': pre_val,
'Recall_train': re_train, 'Recall_val':re_val }
metricas
{'Falsos_positivos_train': 3466.0, 'Falsos_positivos_val': 609.0, '%Falsos_positivos': 1.4308, 'Falsos_negativos_train': 2.0, 'Falsos_negativos_val': 11.0, '%Falsos_negativos': 0.0046, 'Precision_train': 0.1069312, 'Precision_val': 0.09509658, 'Recall_train': 0.99520385, 'Recall_val': 0.85333335}
def plot_metric(history, metric):
train_metrics = history.history[metric]
val_metrics = history.history['val_'+ metric]
epochs = range(1, len(train_metrics) + 1)
plt.plot(epochs, train_metrics, 'bo--')
plt.plot(epochs, val_metrics, 'ro-')
plt.title('Entrenamiento y validación '+ metric)
plt.xlabel("Epochs")
plt.ylabel(metric)
plt.legend(["train_"+metric, 'val_'+metric])
plt.show()
plot_metric(history, 'loss')
plot_metric(history, 'recall')
Al final del entrenamiento, de 56,961 transacciones de validación, se obtuvo:
En el mundo real, se le daría un peso aún mayor a la clase 1, para reflejar que los falsos negativos son más costosos que los falsos positivos.
La próxima vez que su tarjeta de crédito sea rechazada en una compra en línea, ¿será lo anterior la razón?