import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
#데이터 가져옴
data = load_breast_cancer()
#데이터 키로 설명변수
X = data['data']
# Relabel such that 0 = 'benign' and 1 = malignant.
Y = 1 - data['target'] #타겟키로 반응변수 0과 1을 역전시킴
#데이터셋을 train, test로 쪼갬
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=1234)
#공식 사용
def sigmoid(x):
s = 1.0/(1.0 + np.exp(-x))
return s
def gradient(X, Y, beta):
z = np.dot(X,beta.T)*Y
ds = -Y*(1-sigmoid(z))*X
return ds.sum(axis=0)
class LogisticRegression:
def __init__(self, learn_rate):
self.rate = learn_rate
self.n_nodes = None
self.beta = None
def train(self, input_X, input_Y, n_epochs):
self.n_nodes = input_X.shape[1] + 1
self.beta = np.random.normal(0.0,1.0,(1,self.n_nodes))
ones_column = np.ones((input_X.shape[0],1))
X = np.concatenate((ones_column,input_X),axis=1)
Y = (2*input_Y - 1).reshape(-1,1)
for n in range(n_epochs):
self.beta = self.beta - self.rate*gradient(X,Y,self.beta)
return self.beta
def query(self, input_X, prob=True, cutoff=0.5):
ones_column = np.ones((input_X.shape[0],1))
X = np.concatenate((ones_column,input_X),axis=1)
z = np.dot(X,(self.beta).T)
p = sigmoid(z)
if prob :
return p
else:
return (p > cutoff).astype('int')
# Hyperparameter for the learner.
learning_rate = 0.001
# Train and predict.
LR = LogisticRegression(learning_rate)
LR.train(X_train, Y_train, 2000)
Y_pred = LR.query(X_test,prob=False,cutoff=0.5)
# Display the accuracy.
acc = (Y_pred == Y_test.reshape(-1,1)).mean() #예측된 y와 실제 y가 같느냐?하면 T/F로 나옴(논리배열 만들어짐)
#논리배열.mean을 하면 T는 1로, F는 0으로 계산
print('Accuracy : {}'.format(np.round(acc,3)))
#정답의 비율이 바로 정확도 91.2%
Accuracy : 0.912