import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import roc_curve,auc
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
df=pd.read_csv('bank-full.csv',sep=';')
df.head(10)
age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 58 | management | married | tertiary | no | 2143 | yes | no | unknown | 5 | may | 261 | 1 | -1 | 0 | unknown | no |
1 | 44 | technician | single | secondary | no | 29 | yes | no | unknown | 5 | may | 151 | 1 | -1 | 0 | unknown | no |
2 | 33 | entrepreneur | married | secondary | no | 2 | yes | yes | unknown | 5 | may | 76 | 1 | -1 | 0 | unknown | no |
3 | 47 | blue-collar | married | unknown | no | 1506 | yes | no | unknown | 5 | may | 92 | 1 | -1 | 0 | unknown | no |
4 | 33 | unknown | single | unknown | no | 1 | no | no | unknown | 5 | may | 198 | 1 | -1 | 0 | unknown | no |
5 | 35 | management | married | tertiary | no | 231 | yes | no | unknown | 5 | may | 139 | 1 | -1 | 0 | unknown | no |
6 | 28 | management | single | tertiary | no | 447 | yes | yes | unknown | 5 | may | 217 | 1 | -1 | 0 | unknown | no |
7 | 42 | entrepreneur | divorced | tertiary | yes | 2 | yes | no | unknown | 5 | may | 380 | 1 | -1 | 0 | unknown | no |
8 | 58 | retired | married | primary | no | 121 | yes | no | unknown | 5 | may | 50 | 1 | -1 | 0 | unknown | no |
9 | 43 | technician | single | secondary | no | 593 | yes | no | unknown | 5 | may | 55 | 1 | -1 | 0 | unknown | no |
# df.columns: returns all the column names
# Data cleaning
# s stores all value_counts
# value_counts() counts unique objects
s=[]
for i in df.columns:
s.append(df[str(i)].value_counts())
# Transform string to number
# s[1].index is an object and should be transformed to list
s_index1=list(s[1].index)
# Find the object that equals to one in s[].index in every column, then replace the value with the index of it in s list.
for i in s[1].index:
df.loc[df[str(df.columns[1])]==str(i),str(df.columns[1])]=s_index1.index(str(i))
s_index2=list(s[2].index)
for i in s[2].index:
df.loc[df[str(df.columns[2])]==str(i),str(df.columns[2])]=s_index2.index(str(i))
s_index3=list(s[3].index)
for i in s[3].index:
df.loc[df[str(df.columns[3])]==str(i),str(df.columns[3])]=s_index3.index(str(i))
s_index4=list(s[4].index)
for i in s[4].index:
df.loc[df[str(df.columns[4])]==str(i),str(df.columns[4])]=s_index4.index(str(i))
s_index6=list(s[6].index)
for i in s[6].index:
df.loc[df[str(df.columns[6])]==str(i),str(df.columns[6])]=s_index6.index(str(i))
s_index7=list(s[7].index)
for i in s[7].index:
df.loc[df[str(df.columns[7])]==str(i),str(df.columns[7])]=s_index7.index(str(i))
s_index8=list(s[8].index)
for i in s[8].index:
df.loc[df[str(df.columns[8])]==str(i),str(df.columns[8])]=s_index8.index(str(i))
s_index10=list(s[10].index)
for i in s[10].index:
df.loc[df[str(df.columns[10])]==str(i),str(df.columns[10])]=s_index10.index(str(i))
s_index15=list(s[15].index)
for i in s[15].index:
df.loc[df[str(df.columns[15])]==str(i),str(df.columns[15])]=s_index15.index(str(i))
s_index16=list(s[16].index)
for i in s[16].index:
df.loc[df[str(df.columns[16])]==str(i),str(df.columns[16])]=s_index16.index(str(i))
# drop NA, Keep rows with at least 17 Non-Null values
df=df.dropna(thresh=17)
# fill NA
df=df.fillna(method='ffill',limit=3)
df.apply(lambda x:((x-x.mean())/x.var()))
age | job | marital | education | default | balance | housing | loan | contact | day | month | duration | campaign | pdays | previous | poutcome | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.151331 | -0.238980 | -1.068850 | 0.372229 | -1.018335 | 0.000084 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | 0.000043 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
1 | 0.027171 | -0.098488 | 1.013713 | -0.959482 | -1.018335 | -0.000144 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.001616 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
2 | -0.070383 | 0.603972 | -1.068850 | -0.959482 | -1.018335 | -0.000147 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | -0.002747 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
3 | 0.053777 | -0.379472 | -1.068850 | 3.035650 | -1.018335 | 0.000016 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.002505 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
4 | -0.070383 | 1.165940 | 1.013713 | 3.035650 | -1.018335 | -0.000147 | 2.251382 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.000907 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
5 | -0.052646 | -0.238980 | -1.068850 | 0.372229 | -1.018335 | -0.000122 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.001797 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
6 | -0.114725 | -0.238980 | 1.013713 | 0.372229 | -1.018335 | -0.000099 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | -0.000621 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
7 | 0.009434 | 0.603972 | 3.096276 | 0.372229 | 55.472393 | -0.000147 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | 0.001837 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
8 | 0.151331 | 0.322988 | -1.068850 | 1.703939 | -1.018335 | -0.000134 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.003139 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
9 | 0.018303 | -0.098488 | 1.013713 | -0.959482 | -1.018335 | -0.000083 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.003063 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
10 | 0.000566 | 0.042004 | 3.096276 | -0.959482 | -1.018335 | -0.000118 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.000545 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
11 | -0.105857 | 0.042004 | 1.013713 | -0.959482 | -1.018335 | -0.000105 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.001827 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
12 | 0.106988 | -0.098488 | -1.068850 | -0.959482 | -1.018335 | -0.000146 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | 0.003903 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
13 | 0.151331 | -0.098488 | -1.068850 | 3.035650 | -1.018335 | -0.000139 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.002822 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
14 | 0.142462 | 0.182496 | -1.068850 | -0.959482 | -1.018335 | -0.000129 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.001269 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
15 | 0.089251 | 0.322988 | -1.068850 | 1.703939 | -1.018335 | -0.000122 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | 0.001430 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
16 | 0.036040 | 0.042004 | 1.013713 | 3.035650 | -1.018335 | -0.000146 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.002415 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
17 | 0.142462 | -0.379472 | -1.068850 | 1.703939 | -1.018335 | -0.000141 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.003320 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
18 | 0.169068 | 0.322988 | -1.068850 | 1.703939 | -1.018335 | -0.000140 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.000591 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
19 | -0.070383 | 0.182496 | -1.068850 | -0.959482 | -1.018335 | -0.000147 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.003078 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
20 | -0.114725 | -0.379472 | -1.068850 | -0.959482 | -1.018335 | -0.000069 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | 0.000058 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
21 | 0.133594 | -0.238980 | -1.068850 | 0.372229 | -1.018335 | -0.000063 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.001420 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
22 | -0.079251 | -0.379472 | 1.013713 | 1.703939 | -1.018335 | -0.000144 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | -0.001480 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
23 | -0.141331 | 0.182496 | -1.068850 | -0.959482 | -1.018335 | -0.000142 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | 0.001264 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
24 | -0.008303 | 0.322988 | -1.068850 | 1.703939 | -1.018335 | -0.000147 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | -0.001163 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
25 | 0.027171 | 0.042004 | -1.068850 | -0.959482 | -1.018335 | -0.000187 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.001299 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
26 | -0.017171 | -0.238980 | 1.013713 | 0.372229 | -1.018335 | -0.000119 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | 0.000571 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
27 | 0.098120 | 0.603972 | -1.068850 | -0.959482 | -1.018335 | -0.000135 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | -0.001978 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
28 | 0.044908 | -0.238980 | 1.013713 | -0.959482 | -1.018335 | -0.000173 | -1.799045 | -1.190771 | 1.570163 | -0.156019 | -0.390517 | -0.000048 | -0.079586 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
29 | -0.043777 | -0.098488 | 1.013713 | -0.959482 | -1.018335 | -0.000118 | -1.799045 | 6.241027 | 1.570163 | -0.156019 | -0.390517 | 0.001355 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
45181 | 0.044908 | -0.379472 | -1.068850 | -0.959482 | -1.018335 | 0.000595 | 2.251382 | -1.190771 | -1.120941 | -0.011643 | 0.252056 | -0.002777 | -0.079586 | 0.007760 | 0.456040 | 1.455210 | -1.132458 |
45182 | -0.061514 | -0.098488 | -1.068850 | -0.959482 | -1.018335 | -0.000133 | 2.251382 | -1.190771 | -1.120941 | -0.011643 | 0.252056 | 0.002154 | -0.079586 | 0.014642 | 0.832983 | 5.554751 | 8.547930 |
45183 | 0.257753 | 0.322988 | -1.068850 | 1.703939 | -1.018335 | -0.000112 | 2.251382 | -1.190771 | -1.120941 | -0.011643 | 0.252056 | -0.002717 | -0.183777 | 0.005566 | 1.209926 | 5.554751 | -1.132458 |
45184 | 0.195674 | 0.322988 | -1.068850 | -0.959482 | -1.018335 | 0.000014 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.001812 | -0.183777 | -0.001815 | 0.832983 | 5.554751 | -1.132458 |
45185 | 0.169068 | 0.182496 | -1.068850 | 0.372229 | -1.018335 | 0.000312 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.000877 | -0.183777 | 0.005167 | 0.644512 | 5.554751 | 8.547930 |
45186 | 0.160199 | 1.165940 | -1.068850 | 3.035650 | -1.018335 | 0.000015 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.000329 | -0.183777 | 0.006364 | 0.267569 | 1.455210 | -1.132458 |
45187 | -0.079251 | 0.182496 | 1.013713 | -0.959482 | -1.018335 | -0.000021 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.002305 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45188 | -0.105857 | -0.238980 | 1.013713 | -0.959482 | -1.018335 | -0.000071 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.000334 | -0.183777 | 0.050849 | 0.267569 | 5.554751 | 8.547930 |
45189 | -0.141331 | 0.182496 | 1.013713 | -0.959482 | -1.018335 | -0.000125 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.001284 | -0.183777 | 0.005167 | 0.832983 | 1.455210 | -1.132458 |
45190 | -0.079251 | -0.379472 | -1.068850 | -0.959482 | -1.018335 | -0.000132 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.000787 | -0.183777 | 0.014742 | 0.456040 | 5.554751 | 8.547930 |
45191 | 0.302096 | 0.322988 | 3.096276 | 0.372229 | -1.018335 | 0.000264 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.000058 | -0.183777 | 0.014244 | 0.079097 | 1.455210 | 8.547930 |
45192 | -0.105857 | -0.238980 | 1.013713 | 0.372229 | -1.018335 | -0.000064 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.000304 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45193 | -0.114725 | 0.463480 | 1.013713 | 0.372229 | -1.018335 | -0.000130 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.002877 | -0.079586 | -0.000718 | 0.644512 | 5.554751 | 8.547930 |
45194 | 0.160199 | -0.238980 | -1.068850 | 0.372229 | -1.018335 | -0.000132 | -1.799045 | 6.241027 | -1.120941 | 0.002795 | 0.252056 | -0.001450 | -0.079586 | 0.014642 | 0.832983 | 1.455210 | -1.132458 |
45195 | 0.240016 | 0.322988 | -1.068850 | -0.959482 | -1.018335 | -0.000023 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | -0.000696 | -0.183777 | 0.014642 | 1.021455 | 5.554751 | 8.547930 |
45196 | -0.141331 | 1.025448 | 1.013713 | -0.959482 | -1.018335 | -0.000108 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.001083 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45197 | -0.043777 | -0.238980 | 1.013713 | -0.959482 | -1.018335 | 0.000016 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.000178 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45198 | -0.034908 | -0.238980 | -1.068850 | 0.372229 | -1.018335 | 0.000007 | 2.251382 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.001128 | -0.079586 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
45199 | -0.061514 | -0.379472 | 1.013713 | -0.959482 | -1.018335 | 0.000012 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.013689 | 0.024606 | 0.048854 | 2.152284 | 3.504981 | -1.132458 |
45200 | -0.026040 | -0.098488 | -1.068850 | -0.959482 | -1.018335 | -0.000087 | -1.799045 | -1.190771 | -1.120941 | 0.002795 | 0.252056 | 0.019569 | 0.128797 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45201 | 0.106988 | -0.238980 | -1.068850 | 0.372229 | -1.018335 | -0.000084 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | -0.000485 | -0.183777 | 0.014343 | 0.644512 | 5.554751 | 8.547930 |
45202 | -0.061514 | 0.042004 | 1.013713 | -0.959482 | -1.018335 | -0.000087 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | -0.000515 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45203 | -0.159068 | 1.025448 | 1.013713 | 0.372229 | -1.018335 | -0.000135 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | 0.000118 | -0.183777 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45204 | 0.284359 | 0.322988 | -1.068850 | -0.959482 | -1.018335 | 0.000160 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | 0.000631 | -0.183777 | -0.000020 | 1.398398 | 1.455210 | 8.547930 |
45205 | -0.141331 | -0.098488 | 1.013713 | -0.959482 | -1.018335 | -0.000092 | 2.251382 | 6.241027 | -1.120941 | 0.017232 | 0.252056 | 0.001928 | -0.079586 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45206 | 0.089251 | -0.098488 | -1.068850 | 0.372229 | -1.018335 | -0.000058 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | 0.010839 | 0.024606 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45207 | 0.266622 | 0.322988 | 3.096276 | 1.703939 | -1.018335 | 0.000040 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | 0.002983 | -0.079586 | -0.004109 | -0.109374 | -0.594561 | 8.547930 |
45208 | 0.275491 | 0.322988 | -1.068850 | -0.959482 | -1.018335 | 0.000470 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | 0.013101 | 0.232988 | 0.014343 | 0.456040 | 5.554751 | 8.547930 |
45209 | 0.142462 | -0.379472 | -1.068850 | -0.959482 | -1.018335 | -0.000075 | 2.251382 | -1.190771 | 4.261267 | 0.017232 | 0.252056 | 0.003767 | 0.128797 | -0.004109 | -0.109374 | -0.594561 | -1.132458 |
45210 | -0.034908 | 0.603972 | -1.068850 | -0.959482 | -1.018335 | 0.000174 | 2.251382 | -1.190771 | -1.120941 | 0.017232 | 0.252056 | 0.001551 | -0.079586 | 0.014742 | 1.963813 | 3.504981 | -1.132458 |
45211 rows × 17 columns
# select X and y from dataframe
X=df.iloc[:,0:16]
# if y=df.iloc[:,16:17], we get a dataframe,otherwise we get a series. Here is a series object
y=df.iloc[:,16]
# 25% as training data for default, use 'test_size' argument to give a percentage to split
X_train,X_test,y_train,y_test=train_test_split(X,y)
#################################################### logistic regression ####################################################
log_reg=LogisticRegression()
log_reg.fit(X_train,y_train)
pred_log=log_reg.predict(X_test)
# Use 'predict_proba' to get AUC, return the probability in every classification (if it is dichotomy, there are two columns)
pred_proba_log=log_reg.predict_proba(X_test)
print("Logistic classification results:")
# accuracy_score reflects the ratio of correct positive to predicted positive
print("accuracy_score:",accuracy_score(y_test,pred_log))
# precision_score reflects the prediction precision
print("precision_score:",precision_score(y_test,pred_log))
# recall_score reflects the ratio of correct positive to true positive
print("recall_score",recall_score(y_test,pred_log))
print("auc:",roc_auc_score(y_test,pred_proba_log[:,1]))
print("f1_score(weighted):",f1_score(y_test,pred_log,average='weighted'))
print("f1_score(macro):",f1_score(y_test,pred_log,average='macro'))
print("f1_score(micro):",f1_score(y_test,pred_log,average='micro'))
print("f1_score(None):",f1_score(y_test,pred_log))
C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning)
Logistic classification results: accuracy_score: 0.8971069627532513 precision_score: 0.6228748068006182 recall_score 0.3048411497730711 auc: 0.8887919573665001 f1_score(weighted): 0.8811531211378714 f1_score(macro): 0.6764948773636862 f1_score(micro): 0.8971069627532513 f1_score(None): 0.409344845099035
#################################################### svm ####################################################
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
# method without pipeline
# scaler=StandardScaler()
# scaler.fit(df)
# svm_clf=SVC(C=1,probability=True,verbose=1)
# standardize by column
svm_clf=Pipeline((
('scaler',StandardScaler()),
('linear_svc',LinearSVC(C=1,loss='hinge'))
))
svm_clf.fit(X_train,y_train)
pred_svm=svm_clf.predict(X_test)
print("svm classification result")
print("accuracy_score:",accuracy_score(y_test,pred_svm))
print("precision_score:",precision_score(y_test,pred_svm))
print("recall_score",recall_score(y_test,pred_svm))
#print("auc:",roc_auc_score(y_test,pred_proba_svm[:,1]))#auc
print("f1_score(weighted):",f1_score(y_test,pred_svm,average='weighted'))
print("f1_score(macro):",f1_score(y_test,pred_svm,average='macro'))
print("f1_score(micro):",f1_score(y_test,pred_svm,average='micro'))
print("f1_score(None):",f1_score(y_test,pred_svm))
C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\preprocessing\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. return self.partial_fit(X, y) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\base.py:465: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. return self.fit(X, y, **fit_params).transform(X)
svm classification result accuracy_score: 0.8945412722286119 precision_score: 0.6805555555555556 recall_score 0.18532526475037822 f1_score(weighted): 0.8668076170408332 f1_score(macro): 0.6171758911874263 f1_score(micro): 0.8945412722286119 f1_score(None): 0.291319857312723
C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\svm\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. "the number of iterations.", ConvergenceWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\pipeline.py:331: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. Xt = transform.transform(Xt)
#################################################### random forest ####################################################
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
print("random forest classification result:")
rnd_clf=RandomForestClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)
rnd_clf.fit(X_train,y_train)
pred_rf=rnd_clf.predict(X_test)
pred_proba_rf=rnd_clf.predict_proba(X_test)
print("accuracy_score:",accuracy_score(y_test,pred_rf))
print("precision_score:",precision_score(y_test,pred_rf))
print("recall_score",recall_score(y_test,pred_rf))
print("auc:",roc_auc_score(y_test,pred_proba_rf[:,1]))#auc
print("f1_score(weighted):",f1_score(y_test,pred_rf,average='weighted'))
print("f1_score(macro):",f1_score(y_test,pred_rf,average='macro'))
print("f1_score(micro):",f1_score(y_test,pred_rf,average='micro'))
print("f1_score(None):",f1_score(y_test,pred_rf))
random forest classification result: accuracy_score: 0.8969300185791382 precision_score: 0.755700325732899 recall_score 0.17549167927382753 auc: 0.9073402475293071 f1_score(weighted): 0.8673131007408489 f1_score(macro): 0.6146501533893725 f1_score(micro): 0.8969300185791382 f1_score(None): 0.2848373235113566
#################################################### stacking classifier ####################################################
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
import numpy as np
print("Stacking:\n")
clf1=KNeighborsClassifier(n_neighbors=1)
clr2=RandomForestClassifier(random_state=1)
clf3=GaussianNB()
lr=LogisticRegression()#logistics
sclf=StackingClassifier(classifiers=[clf1,clr2,clf3],meta_classifier=lr)
print('3-fold cross validation:\n')
for clf,label in zip([clf1,clr2,clf3,sclf],
['KNN',
'Random Forest',
'Naive Bayes',
'StackingClassifier']):
scores_acc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='accuracy')
scores_auc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='roc_auc')
scores_f1=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1')
scores_f1_macro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_macro')
scores_f1_micro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_micro')
scores_f1_weighted=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_weighted')
print("Accuracy:%0.2f(+/- %0.2f) [%s]\nAuc:%0.2f(+/- %0.2f) [%s]\nf1:%0.2f(+/- %0.2f) [%s]\nf1_micro:%0.2f(+/- %0.2f) [%s]\nf1_macro:%0.2f(+/- %0.2f) [%s]\nf1_weighted:%0.2f(+/- %0.2f) [%s]\n"
%(scores_acc.mean(),scores_acc.std(),label,
scores_auc.mean(),scores_auc.std(),label,
scores_f1.mean(),scores_f1.std(),label,
scores_f1_micro.mean(),scores_f1_micro.std(),label,
scores_f1_macro.mean(),scores_f1_macro.std(),label,
scores_f1_weighted.mean(),scores_f1_weighted.std(),label
))
"""
print("Normal stacking:\n")
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import EnsembleVoteClassifier
#initalizing classifiers
clf1=LogisticRegression(random_state=0)
clf2=RandomForestClassifier(random_state=0)
clf3=SVC(random_state=0,probability=True)
eclf=EnsembleVoteClassifier(clfs=[clf1,clf2,clf3],weights=[2,1,1],voting='soft')
#loading some example data
for clf,lab in zip([clf1,clf2,clf3,eclf],
['Logistic Regression','Random Forest','Naive Bayes','Ensemble']):
scores_acc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='accuracy')
scores_auc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='roc_auc')
scores_f1=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1')
scores_f1_macro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_macro')
scores_f1_micro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_micro')
scores_f1_weighted=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_weighted')
print("Accuracy:%0.2f(+/- %0.2f) [%s]\nAuc:%0.2f(+/- %0.2f) [%s]\nf1:%0.2f(+/- %0.2f) [%s]\nf1_micro:%0.2f(+/- %0.2f) [%s]\nf1_macro:%0.2f(+/- %0.2f) [%s]\nf1_weighted:%0.2f(+/- %0.2f) [%s]\n"
%(scores_acc.mean(),scores_acc.std(),lab,
scores_auc.mean(),scores_auc.std(),lab,
scores_f1.mean(),scores_f1.std(),lab,
scores_f1_micro.mean(),scores_f1_micro.std(),lab,
scores_f1_macro.mean(),scores_f1_macro.std(),lab,
scores_f1_weighted.mean(),scores_f1_weighted.std(),lab
))"""
Stacking: 3-fold cross validation: Accuracy:0.81(+/- 0.07) [KNN] Auc:0.59(+/- 0.00) [KNN] f1:0.27(+/- 0.01) [KNN] f1_micro:0.81(+/- 0.07) [KNN] f1_macro:0.58(+/- 0.03) [KNN] f1_weighted:0.81(+/- 0.04) [KNN]
C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning)
Accuracy:0.65(+/- 0.26) [Random Forest] Auc:0.58(+/- 0.06) [Random Forest] f1:0.17(+/- 0.04) [Random Forest] f1_micro:0.65(+/- 0.26) [Random Forest] f1_macro:0.44(+/- 0.12) [Random Forest] f1_weighted:0.65(+/- 0.24) [Random Forest] Accuracy:0.79(+/- 0.16) [Naive Bayes] Auc:0.78(+/- 0.06) [Naive Bayes] f1:0.42(+/- 0.08) [Naive Bayes] f1_micro:0.79(+/- 0.16) [Naive Bayes] f1_macro:0.64(+/- 0.10) [Naive Bayes] f1_weighted:0.81(+/- 0.12) [Naive Bayes]
C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\ensemble\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning)
Accuracy:0.81(+/- 0.07) [StackingClassifier] Auc:0.68(+/- 0.02) [StackingClassifier] f1:0.27(+/- 0.01) [StackingClassifier] f1_micro:0.81(+/- 0.07) [StackingClassifier] f1_macro:0.58(+/- 0.03) [StackingClassifier] f1_weighted:0.81(+/- 0.04) [StackingClassifier]
#################################################### ROC Curve ####################################################
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import roc_curve,auc
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from matplotlib import pyplot as plt
# roc curve
print('ROC curve')
kfold=StratifiedKFold(n_splits=2,random_state=1)
'''pipeline,turtle or list is outermost,inside must be turtle'''
pipe_lr=Pipeline([('scl',StandardScaler()),('pca',PCA(n_components=1)),('clf',LogisticRegression(random_state=1))])
for i, (train,test) in enumerate(kfold.split(X_train,y_train)):
prob=pipe_lr.fit(X_train.iloc[train],y_train.iloc[train]).predict_proba(X_train.iloc[test])
fpr,tqr,thresholds=roc_curve(y_train.iloc[test],prob[:,1],pos_label=1)
roc_auc=auc(fpr,tqr)
plt.plot(fpr,tqr,label='ROC fold:{},auc:{}'.format(i,roc_auc))
ROC curve
C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\preprocessing\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. return self.partial_fit(X, y) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\base.py:465: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. return self.fit(X, y, **fit_params).transform(X) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\pipeline.py:381: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. Xt = transform.transform(Xt) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\preprocessing\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. return self.partial_fit(X, y) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\base.py:465: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. return self.fit(X, y, **fit_params).transform(X) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) C:\Users\dizhe\Anaconda3\envs\mcm\lib\site-packages\sklearn\pipeline.py:381: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler. Xt = transform.transform(Xt)