Classification: Table of Contents

1 Libraries
2 Breast Cancer - Binary Classification
- 2.1 Sigmoid function
- 2.2 Logistic Regression
3 Iris - Multi-Classs

Libraries¶

In [35]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
np.set_printoptions(suppress=True)

Breast Cancer - Binary Classification¶

In [52]:

data = datasets.load_breast_cancer()

In [53]:

X = data['data']
y = data['target']
target_names = data['target_names']
feature_names = data['feature_names']

X.shape, y.shape

Out[53]:

((569, 30), (569,))

In [38]:

print('Number of features:',X.shape[1])
print('Number of examples:',X.shape[0])

Number of features: 30
Number of examples: 569

In [41]:

print('Features:\n',feature_names)
print('Target:\n',target_names)

Features:
 ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Target:
 ['malignant' 'benign']

In [8]:

print('input xi :\n',X[0])
print('\ntraget yi:',y[0])

input xi :
 [  17.99       10.38      122.8      1001.          0.1184      0.2776
    0.3001      0.1471      0.2419      0.07871     1.095       0.9053
    8.589     153.4         0.006399    0.04904     0.05373     0.01587
    0.03003     0.006193   25.38       17.33      184.6      2019.
    0.1622      0.6656      0.7119      0.2654      0.4601      0.1189  ]

traget yi: 0

Sigmoid function¶

In [43]:

def sigmoid(h):
    yp = 1/(1+np.exp(-h))
    return yp

In [61]:

h= np.arange(-7,7,0.01)
plt.plot(h,sigmoid(h))
plt.grid()
plt.xlim([-5,5])
plt.ylim([0,1])
plt.xlabel('input: h')
plt.ylabel('output: y"')
plt.axvline(x=0,color='k',lw=0.8,ls='--')
plt.axhline(y=0.5,color='k',lw=0.8,ls='--')
plt.title('sigmoid')
plt.show()

Logistic Regression¶

In [49]:

from sklearn.linear_model import LogisticRegression

In [50]:

model = LogisticRegression()

In [51]:

model

Out[51]:

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Spliting data¶

In [54]:

Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)

Training¶

In [55]:

model.fit(Xt,yt)

C:\Users\nbajaj\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)

Out[55]:

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Predicting and testing¶

In [56]:

ytp = model.predict(Xt)
ysp = model.predict(Xs)

Prediction of first example¶

In [57]:

model.predict(Xt[:1])

Out[57]:

array([1])

Probability score¶

In [58]:

model.predict_proba(Xt[:1])

Out[58]:

array([[0.14251963, 0.85748037]])

In [59]:

ytp

Out[59]:

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 1])

In [60]:

yt

Out[60]:

array([1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 1])

Accuracy¶

In [61]:

print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing  Accuracy : ',np.mean(ysp==ys))

Training Accuracy :  0.9447236180904522
Testing  Accuracy :  0.9415204678362573

Iris - Multi-Classs¶

In [62]:

data = datasets.load_iris(return_X_y=False)

In [63]:

data.keys()

Out[63]:

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [64]:

X = data['data']
y = data['target']
target_names = data['target_names']
feature_names = data['feature_names']

X.shape, y.shape

Out[64]:

((150, 4), (150,))

In [6]:

target_names

Out[6]:

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [65]:

feature_names

Out[65]:

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [66]:

print('Number of features:',X.shape[1])
print('Number of examples:',X.shape[0])

Number of features: 4
Number of examples: 150

In [67]:

n=0
for i in range(X[n].shape[0]):
    print(feature_names[i],X[n][i],sep='\t')
print('--------')
print('target : ',y[n])

sepal length (cm)	5.1
sepal width (cm)	3.5
petal length (cm)	1.4
petal width (cm)	0.2
--------
target :  0

In [20]:

idx = list(range(4))+list(np.arange(3)+50)+list(np.arange(3)+100) 
print(' x1 \t  x2 \t x3 \t x4 \t| y')
print('_'*40)
for xi,yi in zip(X[idx], y[idx]):
    print(xi[0],'\t',xi[1],'\t',xi[2],'\t',xi[3],'\t|',yi)

 x1 	  x2 	 x3 	 x4 	| y
________________________________________
5.1 	 3.5 	 1.4 	 0.2 	| 0
4.9 	 3.0 	 1.4 	 0.2 	| 0
4.7 	 3.2 	 1.3 	 0.2 	| 0
4.6 	 3.1 	 1.5 	 0.2 	| 0
7.0 	 3.2 	 4.7 	 1.4 	| 1
6.4 	 3.2 	 4.5 	 1.5 	| 1
6.9 	 3.1 	 4.9 	 1.5 	| 1
6.3 	 3.3 	 6.0 	 2.5 	| 2
5.8 	 2.7 	 5.1 	 1.9 	| 2
7.1 	 3.0 	 5.9 	 2.1 	| 2

Splitting data¶

In [68]:

Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)

In [69]:

model = LogisticRegression()

Training¶

In [70]:

model.fit(Xt,yt)

C:\Users\nbajaj\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)

Out[70]:

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Prediction¶

In [71]:

ytp = model.predict(Xt)
ysp = model.predict(Xs)

Accuracy¶

In [72]:

print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing  Accuracy : ',np.mean(ysp==ys))

Training Accuracy :  0.9714285714285714
Testing  Accuracy :  0.9111111111111111

In [ ]: