Tabular Playground Series - Nov 2021

TabularPGSnov2021.jpeg

1. Exploratory data analyst

Load required libraries and open downloading datasets from this link.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

from IPython.display import display
from IPython.display import HTML

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings

pd.options.display.max_columns = 110
pd.options.display.max_rows = 400


from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import StandardScaler


from tensorflow import keras
from keras.regularizers import l1
from keras.regularizers import l2
from keras.backend import sigmoid

from tensorflow.keras import activations
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import initializers
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.layers import LayerNormalization
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.callbacks import EarlyStopping

warnings.filterwarnings('ignore')  
2022-03-08 12:58:11.081785: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0

Open dataset and get general statistical data. For the calculation mutual information values for —Ālassifier, I used my own function, so as not to clutter up the place with unnecessary code, I load the summary results of mutual values and general statistics values from a cvs file.

In [2]:
%%time
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
submission = pd.read_csv("data/sample_submission.csv")
train.set_index