import os, pickle
import pandas as pd
import numpy as np
#
import matplotlib.pyplot as plt
from IPython.display import display
#
from sklearn import metrics
from sklearn.model_selection import train_test_split
#
import xgboost as xgb
from xgboost import plot_importance
# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)
# pdf_features_label = pd.read_pickle(os.path.join("features", "pdf_features_label.pkl.bz2"), compression="bz2")
pdf_features_label = pd.read_csv(os.path.join("../04_feature_engineering/features", "pdf_features_label.csv.bz2"), compression="bz2")
meta_cols = ["SK_ID_CURR", "TARGET", "tvt_code"]
ls_features = [cname for cname in pdf_features_label.columns if cname not in meta_cols]
#
print("Number of features: {}".format(len(ls_features)))
print(pdf_features_label.shape)
display(pdf_features_label.head().T)
Number of features: 1042 (356255, 1045)
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
Unnamed: 0 | 0 | 1 | 2 | 3 | 4 |
SK_ID_CURR | 100002 | 100003 | 100004 | 100006 | 100007 |
TARGET | 1 | 0 | 0 | 0 | 0 |
tvt_code | train | train | train | train | train |
NAME_INCOME_TYPE_Working | 1 | 0 | 1 | 1 | 1 |
is_REGION_RATING_CLIENT_W_CITY | 2 | 1 | 2 | 2 | 2 |
is_REGION_RATING_CLIENT | 2 | 1 | 2 | 2 | 2 |
is_CODE_GENDER | 1 | 0 | 1 | 0 | 1 |
NAME_EDUCATION_TYPE_Higher_education | 0 | 1 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Secondary___secondary_special | 1 | 0 | 1 | 1 | 1 |
is_REG_CITY_NOT_WORK_CITY | 0 | 0 | 0 | 0 | 1 |
is_FLAG_DOCUMENT_3 | 1 | 1 | 0 | 1 | 0 |
HOUSETYPE_MODE_block_of_flats | 1 | 1 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Pensioner | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_XNA | 0 | 0 | 0 | 0 | 0 |
is_FLAG_EMP_PHONE | 1 | 1 | 1 | 1 | 1 |
OCCUPATION_TYPE_Laborers | 1 | 0 | 1 | 1 | 0 |
WALLSMATERIAL_MODE_Panel | 0 | 0 | 0 | 0 | 0 |
is_LIVE_CITY_NOT_WORK_CITY | 0 | 0 | 0 | 0 | 1 |
NAME_FAMILY_STATUS_Married | 0 | 1 | 0 | 0 | 0 |
is_FLAG_WORK_PHONE | 0 | 0 | 1 | 0 | 0 |
is_FLAG_PHONE | 1 | 1 | 1 | 0 | 0 |
is_FLAG_OWN_CAR | 0 | 0 | 1 | 0 | 0 |
ORGANIZATION_TYPE_Self_employed | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Business_Entity_Type_3 | 1 | 0 | 0 | 1 | 0 |
NAME_FAMILY_STATUS_Single___not_married | 1 | 0 | 1 | 0 | 1 |
FONDKAPREMONT_MODE_reg_oper_account | 1 | 1 | 0 | 0 | 0 |
is_NAME_CONTRACT_TYPE | 1 | 1 | 0 | 1 | 1 |
NAME_HOUSING_TYPE_House___apartment | 1 | 1 | 1 | 1 | 1 |
is_FLAG_DOCUMENT_6 | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Drivers | 0 | 0 | 0 | 0 | 0 |
NAME_FAMILY_STATUS_Civil_marriage | 0 | 0 | 0 | 1 | 0 |
NAME_HOUSING_TYPE_With_parents | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_State_servant | 0 | 1 | 0 | 0 | 0 |
OCCUPATION_TYPE_Core_staff | 0 | 1 | 0 | 0 | 1 |
OCCUPATION_TYPE_Sales_staff | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Commercial_associate | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Stone,_brick | 1 | 0 | 0 | 0 | 0 |
NAME_FAMILY_STATUS_Widow | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Managers | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Accountants | 0 | 0 | 0 | 0 | 0 |
is_FLAG_OWN_REALTY | 1 | 0 | 1 | 1 | 1 |
ORGANIZATION_TYPE_Construction | 0 | 0 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Unaccompanied | 1 | 0 | 1 | 1 | 1 |
is_FLAG_DOCUMENT_8 | 0 | 0 | 0 | 0 | 1 |
NAME_TYPE_SUITE_Family | 0 | 1 | 0 | 0 | 0 |
OCCUPATION_TYPE_High_skill_tech_staff | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_School | 0 | 1 | 0 | 0 | 0 |
NAME_HOUSING_TYPE_Rented_apartment | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Low_skill_Laborers | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Security_staff | 0 | 0 | 0 | 0 | 0 |
FONDKAPREMONT_MODE_reg_oper_spec_account | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Medicine | 0 | 0 | 0 | 0 | 0 |
FONDKAPREMONT_MODE_org_spec_account | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Block | 0 | 1 | 0 | 0 | 0 |
OCCUPATION_TYPE_Cooking_staff | 0 | 0 | 0 | 0 | 0 |
is_REG_REGION_NOT_WORK_REGION | 0 | 0 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Lower_secondary | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Government | 0 | 0 | 1 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_7 | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Medicine_staff | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Military | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_3 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Bank | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Transport__type_3 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Police | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Restaurant | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Kindergarten | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Security | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Agriculture | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Cleaning_staff | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Wooden | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Security_Ministries | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_3 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Business_Entity_Type_2 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Other | 0 | 0 | 0 | 0 | 0 |
is_REG_REGION_NOT_LIVE_REGION | 0 | 0 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Incomplete_higher | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Monolithic | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Transport__type_4 | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Waiters_barmen_staff | 0 | 0 | 0 | 0 | 0 |
baseline_extend_AMT_INCOME_TOTAL | 2.025e+05 | 2.7e+05 | 6.75e+04 | 1.35e+05 | 1.215e+05 |
baseline_extend_AMT_CREDIT | 4.066e+05 | 1.294e+06 | 1.35e+05 | 3.127e+05 | 5.13e+05 |
baseline_extend_AMT_ANNUITY | 2.47e+04 | 3.57e+04 | 6750 | 2.969e+04 | 2.187e+04 |
baseline_extend_AMT_GOODS_PRICE | 3.51e+05 | 1.13e+06 | 1.35e+05 | 2.97e+05 | 5.13e+05 |
baseline_extend_REGION_POPULATION_RELATIVE | 0.0188 | 0.003541 | 0.01003 | 0.008019 | 0.02866 |
baseline_extend_DAYS_REGISTRATION | -3648 | -1186 | -4260 | -9833 | -4311 |
baseline_extend_OWN_CAR_AGE | 0 | 0 | 26 | 0 | 0 |
baseline_extend_CNT_FAM_MEMBERS | 1 | 2 | 1 | 2 | 1 |
baseline_extend_EXT_SOURCE_1 | 0.08304 | 0.3113 | NaN | NaN | NaN |
baseline_extend_EXT_SOURCE_2 | 0.2629 | 0.6222 | 0.5559 | 0.6504 | 0.3227 |
baseline_extend_EXT_SOURCE_3 | 0.1394 | NaN | 0.7296 | NaN | NaN |
baseline_extend_APARTMENTS_AVG | 0.0247 | 0.0959 | NaN | NaN | NaN |
baseline_extend_BASEMENTAREA_AVG | 0.0369 | 0.0529 | NaN | NaN | NaN |
baseline_extend_YEARS_BEGINEXPLUATATION_AVG | 0.9722 | 0.9851 | NaN | NaN | NaN |
baseline_extend_YEARS_BUILD_AVG | 0.6192 | 0.796 | NaN | NaN | NaN |
baseline_extend_COMMONAREA_AVG | 0.0143 | 0.0605 | NaN | NaN | NaN |
baseline_extend_ELEVATORS_AVG | 0 | 0.08 | NaN | NaN | NaN |
baseline_extend_ENTRANCES_AVG | 0.069 | 0.0345 | NaN | NaN | NaN |
baseline_extend_FLOORSMAX_AVG | 0.0833 | 0.2917 | NaN | NaN | NaN |
baseline_extend_FLOORSMIN_AVG | 0.125 | 0.3333 | NaN | NaN | NaN |
baseline_extend_LANDAREA_AVG | 0.0369 | 0.013 | NaN | NaN | NaN |
baseline_extend_LIVINGAPARTMENTS_AVG | 0.0202 | 0.0773 | NaN | NaN | NaN |
baseline_extend_LIVINGAREA_AVG | 0.019 | 0.0549 | NaN | NaN | NaN |
baseline_extend_NONLIVINGAPARTMENTS_AVG | 0 | 0.0039 | NaN | NaN | NaN |
baseline_extend_NONLIVINGAREA_AVG | 0 | 0.0098 | NaN | NaN | NaN |
baseline_extend_APARTMENTS_MODE | 0.0252 | 0.0924 | NaN | NaN | NaN |
baseline_extend_BASEMENTAREA_MODE | 0.0383 | 0.0538 | NaN | NaN | NaN |
baseline_extend_YEARS_BEGINEXPLUATATION_MODE | 0.9722 | 0.9851 | NaN | NaN | NaN |
baseline_extend_YEARS_BUILD_MODE | 0.6341 | 0.804 | NaN | NaN | NaN |
baseline_extend_COMMONAREA_MODE | 0.0144 | 0.0497 | NaN | NaN | NaN |
baseline_extend_ELEVATORS_MODE | 0 | 0.0806 | NaN | NaN | NaN |
baseline_extend_ENTRANCES_MODE | 0.069 | 0.0345 | NaN | NaN | NaN |
baseline_extend_FLOORSMAX_MODE | 0.0833 | 0.2917 | NaN | NaN | NaN |
baseline_extend_FLOORSMIN_MODE | 0.125 | 0.3333 | NaN | NaN | NaN |
baseline_extend_LANDAREA_MODE | 0.0377 | 0.0128 | NaN | NaN | NaN |
baseline_extend_LIVINGAPARTMENTS_MODE | 0.022 | 0.079 | NaN | NaN | NaN |
baseline_extend_LIVINGAREA_MODE | 0.0198 | 0.0554 | NaN | NaN | NaN |
baseline_extend_NONLIVINGAPARTMENTS_MODE | 0 | 0 | NaN | NaN | NaN |
baseline_extend_NONLIVINGAREA_MODE | 0 | 0 | NaN | NaN | NaN |
baseline_extend_APARTMENTS_MEDI | 0.025 | 0.0968 | NaN | NaN | NaN |
baseline_extend_BASEMENTAREA_MEDI | 0.0369 | 0.0529 | NaN | NaN | NaN |
baseline_extend_YEARS_BEGINEXPLUATATION_MEDI | 0.9722 | 0.9851 | NaN | NaN | NaN |
baseline_extend_YEARS_BUILD_MEDI | 0.6243 | 0.7987 | NaN | NaN | NaN |
baseline_extend_COMMONAREA_MEDI | 0.0144 | 0.0608 | NaN | NaN | NaN |
baseline_extend_ELEVATORS_MEDI | 0 | 0.08 | NaN | NaN | NaN |
baseline_extend_ENTRANCES_MEDI | 0.069 | 0.0345 | NaN | NaN | NaN |
baseline_extend_FLOORSMAX_MEDI | 0.0833 | 0.2917 | NaN | NaN | NaN |
baseline_extend_FLOORSMIN_MEDI | 0.125 | 0.3333 | NaN | NaN | NaN |
baseline_extend_LANDAREA_MEDI | 0.0375 | 0.0132 | NaN | NaN | NaN |
baseline_extend_LIVINGAPARTMENTS_MEDI | 0.0205 | 0.0787 | NaN | NaN | NaN |
baseline_extend_LIVINGAREA_MEDI | 0.0193 | 0.0558 | NaN | NaN | NaN |
baseline_extend_NONLIVINGAPARTMENTS_MEDI | 0 | 0.0039 | NaN | NaN | NaN |
baseline_extend_NONLIVINGAREA_MEDI | 0 | 0.01 | NaN | NaN | NaN |
baseline_extend_TOTALAREA_MODE | 0.0149 | 0.0714 | NaN | NaN | NaN |
baseline_extend_OBS_30_CNT_SOCIAL_CIRCLE | 2 | 1 | 0 | 2 | 0 |
baseline_extend_DEF_30_CNT_SOCIAL_CIRCLE | 2 | 0 | 0 | 0 | 0 |
baseline_extend_OBS_60_CNT_SOCIAL_CIRCLE | 2 | 1 | 0 | 2 | 0 |
baseline_extend_DEF_60_CNT_SOCIAL_CIRCLE | 2 | 0 | 0 | 0 | 0 |
baseline_extend_DAYS_LAST_PHONE_CHANGE | -1134 | -828 | -815 | -617 | -1106 |
baseline_extend_AMT_REQ_CREDIT_BUREAU_HOUR | 0 | 0 | 0 | NaN | 0 |
baseline_extend_AMT_REQ_CREDIT_BUREAU_DAY | 0 | 0 | 0 | NaN | 0 |
baseline_extend_AMT_REQ_CREDIT_BUREAU_WEEK | 0 | 0 | 0 | NaN | 0 |
baseline_extend_AMT_REQ_CREDIT_BUREAU_MON | 0 | 0 | 0 | NaN | 0 |
baseline_extend_AMT_REQ_CREDIT_BUREAU_QRT | 0 | 0 | 0 | NaN | 0 |
baseline_extend_AMT_REQ_CREDIT_BUREAU_YEAR | 1 | 0 | 0 | NaN | 0 |
baseline_extend_CREDIT_INCOME_PERCENT | 2.008 | 4.791 | 2 | 2.316 | 4.222 |
baseline_extend_ANNUITY_INCOME_PERCENT | 0.122 | 0.1322 | 0.1 | 0.2199 | 0.18 |
baseline_extend_CREDIT_TERM | 0.06075 | 0.0276 | 0.05 | 0.09494 | 0.04262 |
baseline_extend_YEARS_BIRTH | 25.92 | 45.93 | 52.18 | 52.07 | 54.61 |
baseline_extend_REGISTRATION_YEAR | 9.995 | 3.249 | 11.67 | 26.94 | 11.81 |
baseline_extend_ID_PUBLISH_YEAR | 5.808 | 0.7973 | 6.934 | 6.677 | 9.474 |
baseline_extend_LAST_PHONE_CHANGE_YEAR | 3.107 | 2.268 | 2.233 | 1.69 | 3.03 |
baseline_extend_DAYS_EMPLOYED_ANOM | False | False | False | False | False |
baseline_extend_DAYS_EMPLOYED | -637 | -1188 | -225 | -3039 | -3038 |
baseline_extend_YEARS_EMPLOYED | 1.745 | 3.255 | 0.6164 | 8.326 | 8.323 |
baseline_extend_YEARS_EMPLOYED_PERCENT | 0.06733 | 0.07086 | 0.01181 | 0.1599 | 0.1524 |
bureau_balance_1year_STATUS_C_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_C_sum_max | 2 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_C_sum_mean | 1.5 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_C_sum_std | 0.8367 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_C_sum_median | 2 | NaN | NaN | NaN | NaN |
bureau_balance_1year_bureau_num_trans_min | 3 | NaN | NaN | NaN | NaN |
bureau_balance_1year_bureau_num_trans_max | 9 | NaN | NaN | NaN | NaN |
bureau_balance_1year_bureau_num_trans_mean | 5 | NaN | NaN | NaN | NaN |
bureau_balance_1year_bureau_num_trans_std | 2.098 | NaN | NaN | NaN | NaN |
bureau_balance_1year_bureau_num_trans_median | 4.5 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_0_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_0_sum_max | 6 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_0_sum_mean | 2.167 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_0_sum_std | 2.137 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_0_sum_median | 1.5 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_DPD_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_DPD_sum_max | 2 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_DPD_sum_mean | 1.333 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_DPD_sum_std | 0.8165 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_DPD_sum_median | 1.5 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_Sold_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_Sold_sum_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_Sold_sum_mean | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_Sold_sum_std | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_Sold_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_X_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_X_sum_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_X_sum_mean | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_X_sum_std | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_X_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_0_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_Sold_max_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_X_max_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_DPD_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_1year_STATUS_C_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_C_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_C_sum_max | 2 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_C_sum_mean | 0.4 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_C_sum_std | 0.8944 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_C_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_bureau_num_trans_min | 4 | NaN | NaN | NaN | NaN |
bureau_balance_2year_bureau_num_trans_max | 12 | NaN | NaN | NaN | NaN |
bureau_balance_2year_bureau_num_trans_mean | 10 | NaN | NaN | NaN | NaN |
bureau_balance_2year_bureau_num_trans_std | 3.391 | NaN | NaN | NaN | NaN |
bureau_balance_2year_bureau_num_trans_median | 11 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_0_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_0_sum_max | 12 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_0_sum_mean | 5 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_0_sum_std | 4.359 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_0_sum_median | 4 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_DPD_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_DPD_sum_max | 5 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_DPD_sum_mean | 3 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_DPD_sum_std | 2 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_DPD_sum_median | 4 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_Sold_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_Sold_sum_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_Sold_sum_mean | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_Sold_sum_std | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_Sold_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_X_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_X_sum_max | 3 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_X_sum_mean | 1.6 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_X_sum_std | 1.517 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_X_sum_median | 2 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_0_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_Sold_max_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_X_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_DPD_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_2year_STATUS_C_max_max | 1 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_C_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_C_sum_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_C_sum_mean | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_C_sum_std | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_C_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_bureau_num_trans_min | 1 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_bureau_num_trans_max | 12 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_bureau_num_trans_mean | 4.667 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_bureau_num_trans_std | 6.351 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_bureau_num_trans_median | 1 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_0_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_0_sum_max | 5 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_0_sum_mean | 1.667 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_0_sum_std | 2.887 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_0_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_DPD_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_DPD_sum_max | 4 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_DPD_sum_mean | 1.333 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_DPD_sum_std | 2.309 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_DPD_sum_median | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_Sold_sum_min | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_Sold_sum_max | 0 | NaN | NaN | NaN | NaN |
bureau_balance_gt3year_STATUS_Sold_sum_mean | 0 | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... |
pos_cash_CNT_INSTALMENT_std | 0 | 2.807 | 0.5 | 9.279 | 4.884 |
pos_cash_CNT_INSTALMENT_FUTURE_max | 24 | 12 | 4 | 48 | 24 |
pos_cash_CNT_INSTALMENT_FUTURE_min | 6 | 0 | 0 | 0 | 0 |
pos_cash_CNT_INSTALMENT_FUTURE_sum | 285 | 162 | 9 | 173 | 592 |
pos_cash_CNT_INSTALMENT_FUTURE_mean | 15 | 5.786 | 2.25 | 8.65 | 8.97 |
pos_cash_CNT_INSTALMENT_FUTURE_std | 5.627 | 3.843 | 1.708 | 10.16 | 6.312 |
prev_app_NAME_PRODUCT_TYPE_walk_in_sum | 0 | 0 | 0 | 0 | 1 |
prev_app_NAME_PRODUCT_TYPE_walk_in_mean | 0 | 0 | 0 | 0 | 0.1667 |
prev_app_NAME_PRODUCT_TYPE_walk_in_max | 0 | 0 | 0 | 0 | 1 |
prev_app_NAME_CONTRACT_TYPE_Revolving_loans_sum | 0 | 0 | 0 | 2 | 0 |
prev_app_NAME_CONTRACT_TYPE_Revolving_loans_mean | 0 | 0 | 0 | 0.2222 | 0 |
prev_app_PRODUCT_COMBINATION_Card_Street_sum | 0 | 0 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_Card_Street_mean | 0 | 0 | 0 | 0.1111 | 0 |
prev_app_NAME_CONTRACT_TYPE_Consumer_loans_mean | 1 | 0.6667 | 1 | 0.2222 | 0.3333 |
prev_app_NAME_CONTRACT_TYPE_Revolving_loans_max | 0 | 0 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_Card_Street_max | 0 | 0 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__low_mean | 0 | 0.3333 | 0 | 0.2222 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__low_max | 0 | 1 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__low_sum | 0 | 1 | 0 | 2 | 0 |
prev_app_PRODUCT_COMBINATION_POS_industry_with_interest_mean | 0 | 0.3333 | 0 | 0.1111 | 0 |
prev_app_NAME_CONTRACT_TYPE_Consumer_loans_sum | 1 | 2 | 1 | 2 | 2 |
prev_app_NAME_TYPE_SUITE_Unaccompanied_mean | 1 | 0.3333 | 1 | 0.8889 | 0.6667 |
prev_app_PRODUCT_COMBINATION_POS_industry_with_interest_sum | 0 | 1 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_POS_industry_with_interest_max | 0 | 1 | 0 | 1 | 0 |
prev_app_NAME_TYPE_SUITE_Family_mean | 0 | 0.6667 | 0 | 0.1111 | 0.3333 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__high_sum | 0 | 0 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__high_mean | 0 | 0 | 0 | 0.1111 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__high_max | 0 | 0 | 0 | 1 | 0 |
prev_app_NAME_PRODUCT_TYPE_XNA_mean | 1 | 0.6667 | 1 | 0.5556 | 0.3333 |
prev_app_PRODUCT_COMBINATION_Cash_Street__high_sum | 0 | 0 | 0 | 0 | 1 |
prev_app_NAME_TYPE_SUITE_Family_max | 0 | 1 | 0 | 1 | 1 |
prev_app_PRODUCT_COMBINATION_Cash_Street__high_mean | 0 | 0 | 0 | 0 | 0.1667 |
prev_app_NAME_TYPE_SUITE_Family_sum | 0 | 2 | 0 | 1 | 2 |
prev_app_PRODUCT_COMBINATION_Cash_Street__high_max | 0 | 0 | 0 | 0 | 1 |
prev_app_NAME_TYPE_SUITE_Unaccompanied_sum | 1 | 1 | 1 | 8 | 4 |
prev_app_PRODUCT_COMBINATION_POS_household_without_interest_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_household_without_interest_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_household_without_interest_max | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_sum | 0 | 0 | 0 | 2 | 0 |
prev_app_NAME_CONTRACT_TYPE_Cash_loans_sum | 0 | 1 | 0 | 5 | 4 |
prev_app_PRODUCT_COMBINATION_POS_mobile_with_interest_mean | 0 | 0 | 0 | 0 | 0.1667 |
prev_app_PRODUCT_COMBINATION_Cash_mean | 0 | 0 | 0 | 0.2222 | 0 |
prev_app_NAME_PRODUCT_TYPE_x_sell_mean | 0 | 0.3333 | 0 | 0.4444 | 0.5 |
prev_app_PRODUCT_COMBINATION_Cash_max | 0 | 0 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_POS_mobile_with_interest_max | 0 | 0 | 0 | 0 | 1 |
prev_app_PRODUCT_COMBINATION_POS_mobile_with_interest_sum | 0 | 0 | 0 | 0 | 1 |
prev_app_NAME_CONTRACT_TYPE_Cash_loans_mean | 0 | 0.3333 | 0 | 0.5556 | 0.6667 |
prev_app_PRODUCT_COMBINATION_Cash_Street__middle_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_Street__middle_max | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_Street__middle_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__middle_mean | 0 | 0 | 0 | 0 | 0.5 |
prev_app_PRODUCT_COMBINATION_POS_household_with_interest_mean | 0 | 0.3333 | 0 | 0.1111 | 0.1667 |
prev_app_PRODUCT_COMBINATION_POS_household_with_interest_sum | 0 | 1 | 0 | 1 | 1 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__middle_max | 0 | 0 | 0 | 0 | 1 |
prev_app_PRODUCT_COMBINATION_Cash_X_Sell__middle_sum | 0 | 0 | 0 | 0 | 3 |
prev_app_PRODUCT_COMBINATION_Card_X_Sell_sum | 0 | 0 | 0 | 1 | 0 |
prev_app_FLAG_LAST_APPL_PER_CONTRACT_Y_sum | 1 | 3 | 1 | 9 | 6 |
prev_app_NAME_TYPE_SUITE_Children_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_TYPE_SUITE_Children_max | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_TYPE_SUITE_Children_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_CONTRACT_TYPE_Cash_loans_max | 0 | 1 | 0 | 1 | 1 |
prev_app_PRODUCT_COMBINATION_Card_X_Sell_max | 0 | 0 | 0 | 1 | 0 |
prev_app_PRODUCT_COMBINATION_Card_X_Sell_mean | 0 | 0 | 0 | 0.1111 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_SATURDAY_mean | 1 | 0.3333 | 0 | 0.1111 | 0.1667 |
prev_app_WEEKDAY_APPR_PROCESS_START_MONDAY_sum | 0 | 0 | 0 | 0 | 1 |
prev_app_PRODUCT_COMBINATION_POS_household_with_interest_max | 0 | 1 | 0 | 1 | 1 |
prev_app_NAME_PRODUCT_TYPE_x_sell_max | 0 | 1 | 0 | 1 | 1 |
prev_app_PRODUCT_COMBINATION_POS_industry_without_interest_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_industry_without_interest_max | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_industry_without_interest_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_PRODUCT_TYPE_x_sell_sum | 0 | 1 | 0 | 4 | 3 |
prev_app_WEEKDAY_APPR_PROCESS_START_SUNDAY_mean | 0 | 0.3333 | 0 | 0.1111 | 0.3333 |
prev_app_NAME_CONTRACT_TYPE_Consumer_loans_max | 1 | 1 | 1 | 1 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_SATURDAY_max | 1 | 1 | 0 | 1 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_FRIDAY_sum | 0 | 1 | 1 | 0 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_SUNDAY_max | 0 | 1 | 0 | 1 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_MONDAY_max | 0 | 0 | 0 | 0 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_SUNDAY_sum | 0 | 1 | 0 | 1 | 2 |
prev_app_WEEKDAY_APPR_PROCESS_START_THURSDAY_sum | 0 | 0 | 0 | 6 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_MONDAY_mean | 0 | 0 | 0 | 0 | 0.1667 |
prev_app_PRODUCT_COMBINATION_Cash_Street__low_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_Street__low_max | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_Cash_Street__low_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_TUESDAY_sum | 0 | 0 | 0 | 1 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_max | 0 | 0 | 0 | 0 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_SATURDAY_sum | 1 | 1 | 0 | 1 | 1 |
prev_app_NAME_TYPE_SUITE_Unaccompanied_max | 1 | 1 | 1 | 1 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_FRIDAY_max | 0 | 1 | 1 | 0 | 1 |
prev_app_WEEKDAY_APPR_PROCESS_START_FRIDAY_mean | 0 | 0.3333 | 1 | 0 | 0.1667 |
prev_app_NAME_TYPE_SUITE_Spouse,_partner_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_THURSDAY_max | 0 | 0 | 0 | 1 | 1 |
prev_app_NAME_PRODUCT_TYPE_XNA_max | 1 | 1 | 1 | 1 | 1 |
prev_app_NAME_PRODUCT_TYPE_XNA_sum | 1 | 2 | 1 | 5 | 2 |
prev_app_NAME_TYPE_SUITE_Spouse,_partner_max | 0 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_mobile_without_interest_mean | 0 | 0 | 1 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_mobile_without_interest_max | 0 | 0 | 1 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_mobile_without_interest_sum | 0 | 0 | 1 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_other_with_interest_mean | 1 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_other_with_interest_max | 1 | 0 | 0 | 0 | 0 |
prev_app_PRODUCT_COMBINATION_POS_other_with_interest_sum | 1 | 0 | 0 | 0 | 0 |
prev_app_NAME_TYPE_SUITE_Spouse,_partner_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_FLAG_LAST_APPL_PER_CONTRACT_N_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_FLAG_LAST_APPL_PER_CONTRACT_Y_mean | 1 | 1 | 1 | 1 | 1 |
prev_app_FLAG_LAST_APPL_PER_CONTRACT_N_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_FLAG_LAST_APPL_PER_CONTRACT_N_max | 0 | 0 | 0 | 0 | 0 |
prev_app_WEEKDAY_APPR_PROCESS_START_THURSDAY_mean | 0 | 0 | 0 | 0.6667 | 0.1667 |
prev_app_NAME_TYPE_SUITE_Other_A_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_TYPE_SUITE_Other_A_mean | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_TYPE_SUITE_Other_A_max | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_CONTRACT_STATUS_ordinal_std | NaN | 0 | NaN | 0.7265 | 0 |
prev_app_CODE_REJECT_REASON_ordinal_std | NaN | 0 | NaN | 0.6667 | 0 |
prev_app_NAME_CONTRACT_STATUS_ordinal_mean | 0 | 0 | 0 | 0.5556 | 0 |
prev_app_PREPAID_USER_mean | 0 | 0 | 0 | 0.5556 | 0.1667 |
prev_app_CODE_REJECT_REASON_ordinal_mean | 0 | 0 | 0 | 0.2222 | 0 |
prev_app_CODE_REJECT_REASON_ordinal_sum | 0 | 0 | 0 | 2 | 0 |
prev_app_NAME_YIELD_GROUP_ordinal_mean | 3 | 1.667 | 1 | 1.222 | 1.5 |
prev_app_NAME_CONTRACT_STATUS_ordinal_sum | 0 | 0 | 0 | 5 | 0 |
prev_app_CODE_REJECT_REASON_ordinal_max | 0 | 0 | 0 | 2 | 0 |
prev_app_NAME_GOODS_CATEGORY_ordinal_mean | 13 | 2.333 | 1 | 1.222 | 1.333 |
prev_app_NAME_YIELD_GROUP_ordinal_max | 3 | 3 | 1 | 3 | 2 |
prev_app_PREPAID_USER_sum | 0 | 0 | 0 | 5 | 1 |
prev_app_NAME_CONTRACT_STATUS_ordinal_max | 0 | 0 | 0 | 2 | 0 |
prev_app_NAME_PORTFOLIO_ordinal_mean | 0 | 0.3333 | 0 | 1.333 | 0.6667 |
prev_app_NAME_GOODS_CATEGORY_ordinal_max | 13 | 5 | 1 | 7 | 4 |
prev_app_NAME_GOODS_CATEGORY_ordinal_sum | 13 | 7 | 1 | 11 | 8 |
prev_app_PREPAID_USER_std | NaN | 0 | NaN | 0.527 | 0.4082 |
prev_app_SELLERPLACE_AREA_mean | 500 | 533 | 30 | 894.2 | 409.2 |
prev_app_HOUR_APPR_PROCESS_START_mean | 9 | 14.67 | 5 | 14.67 | 12.33 |
prev_app_NAME_CASH_LOAN_PURPOSE_ordinal_std | NaN | 0.5774 | NaN | 0.527 | 0.5164 |
prev_app_SELLERPLACE_AREA_std | NaN | 757.5 | NaN | 2674 | 612.9 |
prev_app_NAME_GOODS_CATEGORY_ordinal_std | NaN | 2.517 | NaN | 2.539 | 2.066 |
prev_app_SELLERPLACE_AREA_max | 500 | 1400 | 30 | 8025 | 1200 |
prev_app_NAME_PAYMENT_TYPE_ordinal_mean | 1 | 0.3333 | 0 | 0.5556 | 0.1667 |
prev_app_NAME_PORTFOLIO_ordinal_sum | 0 | 1 | 0 | 12 | 4 |
prev_app_NAME_PAYMENT_TYPE_ordinal_sum | 1 | 1 | 0 | 5 | 1 |
prev_app_HOUR_APPR_PROCESS_START_min | 9 | 12 | 5 | 12 | 8 |
prev_app_HOUR_APPR_PROCESS_START_max | 9 | 17 | 5 | 15 | 15 |
prev_app_SELLERPLACE_AREA_sum | 500 | 1599 | 30 | 8048 | 2455 |
prev_app_NAME_PORTFOLIO_ordinal_max | 0 | 1 | 0 | 3 | 1 |
prev_app_NAME_PORTFOLIO_ordinal_std | NaN | 0.5774 | NaN | 1 | 0.5164 |
prev_app_PREPAID_USER_max | 0 | 0 | 0 | 1 | 1 |
prev_app_NAME_SELLER_INDUSTRY_ordinal_mean | 7 | 1.333 | 2 | 0.5556 | 0.8333 |
prev_app_NAME_CASH_LOAN_PURPOSE_ordinal_max | 0 | 1 | 0 | 1 | 1 |
prev_app_NAME_YIELD_GROUP_ordinal_sum | 3 | 5 | 1 | 11 | 9 |
prev_app_NAME_YIELD_GROUP_ordinal_std | NaN | 1.155 | NaN | 1.302 | 0.5477 |
prev_app_NAME_SELLER_INDUSTRY_ordinal_max | 7 | 3 | 2 | 4 | 2 |
prev_app_NAME_CASH_LOAN_PURPOSE_ordinal_sum | 0 | 1 | 0 | 5 | 4 |
prev_app_NAME_CASH_LOAN_PURPOSE_ordinal_mean | 0 | 0.3333 | 0 | 0.5556 | 0.6667 |
prev_app_NAME_SELLER_INDUSTRY_ordinal_sum | 7 | 4 | 2 | 5 | 5 |
prev_app_NAME_CLIENT_TYPE_ordinal_std | NaN | 1.155 | NaN | 0.3333 | 0.4082 |
prev_app_NAME_YIELD_GROUP_ordinal_min | 3 | 1 | 1 | 0 | 1 |
prev_app_NAME_SELLER_INDUSTRY_ordinal_std | NaN | 1.528 | NaN | 1.333 | 0.7528 |
prev_app_NAME_PAYMENT_TYPE_ordinal_std | NaN | 0.5774 | NaN | 0.527 | 0.4082 |
prev_app_NAME_PAYMENT_TYPE_ordinal_max | 1 | 1 | 0 | 1 | 1 |
prev_app_NAME_GOODS_CATEGORY_ordinal_min | 13 | 0 | 1 | 0 | 0 |
prev_app_NAME_CLIENT_TYPE_ordinal_max | 1 | 2 | 1 | 1 | 1 |
prev_app_CHANNEL_TYPE_ordinal_std | NaN | 1 | NaN | 0.7071 | 1.835 |
prev_app_NFLAG_LAST_APPL_IN_DAY_sum | 1 | 3 | 1 | 9 | 6 |
prev_app_HOUR_APPR_PROCESS_START_std | NaN | 2.517 | NaN | 1 | 2.503 |
prev_app_NAME_SELLER_INDUSTRY_ordinal_min | 7 | 0 | 2 | 0 | 0 |
prev_app_SELLERPLACE_AREA_min | 500 | -1 | 30 | -1 | -1 |
prev_app_CHANNEL_TYPE_ordinal_max | 2 | 2 | 3 | 2 | 5 |
prev_app_NAME_PORTFOLIO_ordinal_min | 0 | 0 | 0 | 0 | 0 |
prev_app_NAME_PAYMENT_TYPE_ordinal_min | 1 | 0 | 0 | 0 | 0 |
prev_app_NAME_CLIENT_TYPE_ordinal_mean | 1 | 1.333 | 1 | 0.1111 | 0.1667 |
prev_app_CHANNEL_TYPE_ordinal_min | 2 | 0 | 3 | 0 | 0 |
prev_app_NAME_CLIENT_TYPE_ordinal_min | 1 | 0 | 1 | 0 | 0 |
prev_app_CHANNEL_TYPE_ordinal_mean | 2 | 1 | 3 | 0.3333 | 1.833 |
prev_app_CHANNEL_TYPE_ordinal_sum | 2 | 3 | 3 | 3 | 11 |
prev_app_NAME_CLIENT_TYPE_ordinal_sum | 1 | 4 | 1 | 1 | 1 |
prev_app_NFLAG_LAST_APPL_IN_DAY_std | NaN | 0 | NaN | 0 | 0 |
prev_app_NFLAG_LAST_APPL_IN_DAY_min | 1 | 1 | 1 | 1 | 1 |
prev_app_NFLAG_LAST_APPL_IN_DAY_mean | 1 | 1 | 1 | 1 | 1 |
prev_app_DAYS_FIRST_DRAWING_TO_YEARS_std | NaN | NaN | NaN | NaN | NaN |
prev_app_DAYS_FIRST_DRAWING_TO_YEARS_mean | NaN | NaN | NaN | NaN | NaN |
prev_app_DAYS_FIRST_DRAWING_TO_YEARS_max | NaN | NaN | NaN | NaN | NaN |
prev_app_DAYS_FIRST_DRAWING_TO_YEARS_min | NaN | NaN | NaN | NaN | NaN |
prev_app_DAYS_DECISION_TO_YEARS_std | NaN | 2.461 | NaN | 0.4308 | 1.965 |
prev_app_DAYS_LAST_DUE_1ST_VERSION_TO_YEARS_sum | -0.3425 | 8.255 | 1.901 | -2.995 | 11.47 |
prev_app_AMT_DOWN_PAYMENT_std | NaN | 3975 | NaN | 2.223e+04 | 1760 |
prev_app_DAYS_FIRST_DUE_TO_YEARS_std | NaN | 2.46 | NaN | 0.55 | 2.13 |
prev_app_DAYS_DECISION_TO_YEARS_mean | 1.66 | 3.575 | 2.233 | 0.7464 | 3.35 |
prev_app_DAYS_FIRST_DUE_TO_YEARS_max | 1.548 | 6.329 | 2.148 | 1.493 | 6.373 |
prev_app_DAYS_DECISION_TO_YEARS_max | 1.66 | 6.414 | 2.233 | 1.69 | 6.458 |
prev_app_DAYS_LAST_DUE_1ST_VERSION_TO_YEARS_max | -0.3425 | 5.425 | 1.901 | 0.589 | 5.633 |
prev_app_DAYS_LAST_DUE_TO_YEARS_sum | 0.06849 | 8.666 | 1.984 | 1.578 | 12.5 |
prev_app_DAYS_TERMINATION_TO_YEARS_sum | 0.04658 | 8.608 | 1.956 | 1.532 | 12.39 |
prev_app_AMT_DOWN_PAYMENT_sum | 0 | 6885 | 4860 | 6.968e+04 | 6782 |
prev_app_DAYS_FIRST_DUE_TO_YEARS_sum | 1.548 | 10.47 | 2.148 | 2.677 | 17.3 |
prev_app_AMT_DOWN_PAYMENT_max | 0 | 6885 | 4860 | 6.699e+04 | 3676 |
prev_app_AMT_DOWN_PAYMENT_mean | 0 | 2295 | 4860 | 7742 | 1130 |
prev_app_DAYS_LAST_DUE_TO_YEARS_max | 0.06849 | 5.425 | 1.984 | 1.164 | 5.633 |
prev_app_RATE_DOWN_PAYMENT_std | NaN | 0.05004 | NaN | 0.05631 | 0.0672 |
prev_app_DAYS_TERMINATION_TO_YEARS_max | 0.04658 | 5.414 | 1.956 | 1.14 | 5.592 |
prev_app_DAYS_LAST_DUE_1ST_VERSION_TO_YEARS_mean | -0.3425 | 2.752 | 1.901 | -0.9982 | 2.294 |
prev_app_DAYS_FIRST_DUE_TO_YEARS_mean | 1.548 | 3.491 | 2.148 | 0.8922 | 3.461 |
prev_app_AMT_ANNUITY_mean | 9252 | 5.655e+04 | 5357 | 1.577e+04 | 1.228e+04 |
prev_app_DAYS_LAST_DUE_TO_YEARS_std | NaN | 2.202 | NaN | 0.5308 | 2.052 |
prev_app_CNT_PAYMENT_std | NaN | 3.464 | NaN | 19.7 | 14.35 |
prev_app_RATE_DOWN_PAYMENT_max | 0 | 0.1001 | 0.212 | 0.2178 | 0.2189 |
prev_app_DAYS_TERMINATION_TO_YEARS_std | NaN | 2.209 | NaN | 0.5289 | 2.042 |
prev_app_DAYS_LAST_DUE_TO_YEARS_mean | 0.06849 | 2.889 | 1.984 | 0.789 | 3.125 |
prev_app_DAYS_TERMINATION_TO_YEARS_mean | 0.04658 | 2.869 | 1.956 | 0.7658 | 3.099 |
prev_app_RATE_DOWN_PAYMENT_mean | 0 | 0.05056 | 0.212 | 0.07645 | 0.08758 |
prev_app_AMT_ANNUITY_min | 9252 | 6737 | 5357 | 0 | 1834 |
prev_app_DAYS_DECISION_TO_YEARS_sum | 1.66 | 10.73 | 2.233 | 6.718 | 20.1 |
prev_app_AMT_APPLICATION_mean | 1.791e+05 | 4.354e+05 | 2.428e+04 | 2.722e+05 | 1.505e+05 |
prev_app_AMT_GOODS_PRICE_mean | 1.791e+05 | 4.354e+05 | 2.428e+04 | 2.722e+05 | 1.505e+05 |
prev_app_DAYS_FIRST_DRAWING_TO_YEARS_sum | 0 | 0 | 0 | 0 | 0 |
prev_app_AMT_ANNUITY_max | 9252 | 9.836e+04 | 5357 | 3.995e+04 | 2.268e+04 |
prev_app_AMT_CREDIT_min | 1.791e+05 | 6.805e+04 | 2.011e+04 | 0 | 1.462e+04 |
prev_app_DAYS_DECISION_TO_YEARS_min | 1.66 | 2.044 | 2.233 | 0.4959 | 1.025 |
prev_app_CNT_PAYMENT_max | 24 | 12 | 4 | 48 | 48 |
prev_app_AMT_GOODS_PRICE_min | 1.791e+05 | 6.881e+04 | 2.428e+04 | 0 | 1.718e+04 |
prev_app_AMT_APPLICATION_min | 1.791e+05 | 6.881e+04 | 2.428e+04 | 0 | 1.718e+04 |
prev_app_AMT_CREDIT_mean | 1.791e+05 | 4.842e+05 | 2.011e+04 | 2.917e+05 | 1.666e+05 |
prev_app_AMT_ANNUITY_sum | 9252 | 1.697e+05 | 5357 | 1.419e+05 | 7.367e+04 |
prev_app_CNT_PAYMENT_min | 24 | 6 | 4 | 0 | 10 |
prev_app_DAYS_LAST_DUE_1ST_VERSION_TO_YEARS_std | NaN | 2.342 | NaN | 2.153 | 2.556 |
prev_app_AMT_APPLICATION_max | 1.791e+05 | 9e+05 | 2.428e+04 | 6.885e+05 | 2.475e+05 |
prev_app_AMT_GOODS_PRICE_max | 1.791e+05 | 9e+05 | 2.428e+04 | 6.885e+05 | 2.475e+05 |
prev_app_AMT_ANNUITY_std | NaN | 4.633e+04 | NaN | 1.6e+04 | 8064 |
prev_app_RATE_DOWN_PAYMENT_sum | 0 | 0.1517 | 0.212 | 0.6881 | 0.5255 |
prev_app_NFLAG_INSURED_ON_APPROVAL_sum | 0 | 2 | 0 | 0 | 3 |
prev_app_DAYS_LAST_DUE_1ST_VERSION_TO_YEARS_min | -0.3425 | 1.058 | 1.901 | -3.449 | -0.9479 |
prev_app_RATE_INTEREST_PRIVILEGED_mean | 0.8351 | 0.8351 | 0.8351 | 0.8351 | 0.8351 |
prev_app_AMT_APPLICATION_sum | 1.791e+05 | 1.306e+06 | 2.428e+04 | 2.45e+06 | 9.032e+05 |
prev_app_AMT_GOODS_PRICE_sum | 1.791e+05 | 1.306e+06 | 2.428e+04 | 2.45e+06 | 9.032e+05 |
prev_app_AMT_CREDIT_max | 1.791e+05 | 1.036e+06 | 2.011e+04 | 9.066e+05 | 2.844e+05 |
prev_app_NFLAG_INSURED_ON_APPROVAL_max | 0 | 1 | 0 | 0 | 1 |
prev_app_RATE_INTEREST_PRIMARY_sum | 0.1891 | 0.5674 | 0.1891 | 1.702 | 1.135 |
prev_app_RATE_INTEREST_PRIVILEGED_sum | 0.8351 | 2.505 | 0.8351 | 7.516 | 5.011 |
prev_app_NFLAG_INSURED_ON_APPROVAL_std | NaN | 0.5774 | NaN | 0 | 0.5477 |
prev_app_CNT_PAYMENT_mean | 24 | 10 | 4 | 15.33 | 20.67 |
prev_app_CNT_PAYMENT_sum | 24 | 30 | 4 | 138 | 124 |
prev_app_AMT_APPLICATION_std | NaN | 4.242e+05 | NaN | 2.862e+05 | 1.006e+05 |
prev_app_AMT_GOODS_PRICE_std | NaN | 4.242e+05 | NaN | 2.862e+05 | 1.006e+05 |
prev_app_AMT_CREDIT_sum | 1.791e+05 | 1.453e+06 | 2.011e+04 | 2.625e+06 | 9.998e+05 |
prev_app_DAYS_FIRST_DUE_TO_YEARS_min | 1.548 | 1.962 | 2.148 | 0.4137 | 0.9425 |
prev_app_AMT_DOWN_PAYMENT_min | 0 | 0 | 4860 | 0 | 0 |
prev_app_NFLAG_INSURED_ON_APPROVAL_mean | 0 | 0.6667 | 0 | 0 | 0.6 |
prev_app_NFLAG_INSURED_ON_APPROVAL_min | 0 | 0 | 0 | 0 | 0 |
prev_app_DAYS_LAST_DUE_TO_YEARS_min | 0.06849 | 1.468 | 1.984 | 0.4137 | 0.9699 |
prev_app_DAYS_TERMINATION_TO_YEARS_min | 0.04658 | 1.444 | 1.956 | 0.3918 | 0.9507 |
prev_app_RATE_DOWN_PAYMENT_min | 0 | 0 | 0.212 | 0.05161 | 0.05161 |
prev_app_AMT_CREDIT_std | NaN | 4.979e+05 | NaN | 3.333e+05 | 1.18e+05 |
prev_app_RATE_INTEREST_PRIMARY_mean | 0.1891 | 0.1891 | 0.1891 | 0.1891 | 0.1891 |
prev_app_SK_ID_PREV | 1 | 3 | 1 | 9 | 6 |
1045 rows × 5 columns
pdf_features_label["tvt_code"].value_counts()
train 216948 kaggle_test 48744 test 46127 val 44436 Name: tvt_code, dtype: int64
version = "v07"
def get_Xy_from_pdf(pdf_input, ls_features, tvt_code):
pdf_data = pdf_input[pdf_input["tvt_code"] == tvt_code].copy()
#
X = pdf_data[ls_features]
y = pdf_data["TARGET"]
return (X, y)
#
X_train, y_train = get_Xy_from_pdf(pdf_features_label, ls_features, "train")
X_val, y_val = get_Xy_from_pdf(pdf_features_label, ls_features, "val")
X_test, y_test = get_Xy_from_pdf(pdf_features_label, ls_features, "test")
%%time
param_init = {
"objective": "binary:logistic",
"booster": "gbtree",
"max_depth": 4, # default: 3 only for depthwise
"n_estimators": 1000, # default: 500
"learning_rate": 0.025, # default: 0.05
"subsample": 0.7,
"colsample_bytree": 0.6, # default: 1.0
"colsample_bylevel": 0.5, # default: 1.0
"random_state": 0,
#
"silent": True,
"n_jobs": 16,
#
"tree_method": "hist", # default: auto
"grow_policy": "lossguide", # default depthwise
}
param_fit = {
"eval_metric": "auc",
"early_stopping_rounds": 500, # default: 100
"verbose": 200,
"eval_set": [(X_train, y_train), (X_val, y_val), (X_test, y_test)]
}
xgb_model = xgb.XGBClassifier(**param_init)
xgb_model.fit(X_train, y_train, **param_fit)
evals_result = xgb_model.evals_result()
evals_result
[0] validation_0-auc:0.698894 validation_1-auc:0.691129 validation_2-auc:0.701216 Multiple eval metrics have been passed: 'validation_2-auc' will be used for early stopping. Will train until validation_2-auc hasn't improved in 500 rounds. [200] validation_0-auc:0.775736 validation_1-auc:0.758759 validation_2-auc:0.770227 [400] validation_0-auc:0.800803 validation_1-auc:0.774603 validation_2-auc:0.784941 [600] validation_0-auc:0.814218 validation_1-auc:0.779209 validation_2-auc:0.789628 [800] validation_0-auc:0.824751 validation_1-auc:0.781372 validation_2-auc:0.791424 [999] validation_0-auc:0.83417 validation_1-auc:0.782687 validation_2-auc:0.792593 CPU times: user 1h 9min 53s, sys: 48.3 s, total: 1h 10min 41s Wall time: 5min 32s
# save model to file
res_model = {
"xgb_model": xgb_model,
"features": ls_features
}
pickle.dump(res_model, open("models/xgb_model_baseline_{}.mod".format(version), "wb"))
# read model
with open("models/xgb_model_baseline_{}.mod".format(version), "rb") as input_file:
res_model = pickle.load(input_file)
res_model.keys()
['xgb_model', 'features']
def visualize_auc(pdf, tvt_code, res_model):
# get Xy and predict
X, y = get_Xy_from_pdf(pdf, res_model["features"], tvt_code)
y_pred = res_model["xgb_model"].predict_proba(X)[:, 1]
# get values
auc_value = metrics.roc_auc_score(y, y_pred)
res01 = metrics.roc_curve(y, y_pred)
# plot
figure, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,3))
lw = 2
ax1.plot(res01[0], res01[1], color="darkorange",lw=lw, label="ROC")
ax1.plot([0, 1], [0, 1], color="navy", label="Random", lw=lw, linestyle="--")
ax1.set_xlim([0.0, 1.0])
ax1.set_ylim([0.0, 1.05])
ax1.set_xlabel("False Positive Rate")
ax1.set_ylabel("True Positive Rate")
ax1.set_title("AUC = %0.5f"%(auc_value))
ax1.legend(loc="lower right")
# prediction probability histogram
ax2.set_title("{} set (size: {})".format(tvt_code, y.shape[0]))
ax2.hist(y_pred, bins=200)
plt.show()
#
visualize_auc(pdf_features_label, "test", res_model)
#
fig_height = len(res_model["features"]) / 4
fig, ax = plt.subplots(figsize=(10, fig_height))
plot_importance(res_model["xgb_model"], ax=ax)
plt.show()