#!/usr/bin/env python # coding: utf-8 # # Import # In[1]: import pandas as pd import os from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score from sklearn.svm import SVC from sklearn.metrics import confusion_matrix print pd.__version__ # In[2]: # reading data into a dataframe DATA_DIR = 'data' df = pd.read_csv( os.path.abspath(os.path.join(DATA_DIR, 'day9/banknote_authentication.csv')) ) df.head(5) # In[3]: # (rows, columns) df.shape # In[4]: # look for NaN values df.isnull().sum() # In[5]: # let's figure out the distribution of target variable # there are many options to handle this imbalance; of which one is to add false data to class 1; secondly we can # delete records from 0 to make it equal to 1 # we will leave it for now df['target'].value_counts() # In[6]: X = df.iloc[:, :-1].values Y = df.iloc[:, -1].values # # Split # In[7]: # ideal practice is to use test as 20% - 30% of training data # defined by test_size in train_test_split() # random_state is required to avoid sequential biasness in the data distribution def data_split(X, Y): X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.3, random_state = 10) return X_train, X_test, Y_train, Y_test X_train, X_test, Y_train, Y_test = data_split(X, Y) # In[8]: X_train.shape, X_test.shape # # Model Define # In[9]: class SVMModel: def __init__(self): self.classifier = SVC() def train(self, X_train, Y_train): model = self.classifier.fit(X_train, Y_train) return model def predict(self, model, X_test): return model.predict(X_test) def evaluate(self, Y_test, Y_pred, measure): if measure=='matrix': cm = confusion_matrix(Y_test, Y_pred, labels=[0, 1]) return cm elif measure=='accuracy': return accuracy_score(Y_test, Y_pred)*100 else: return None # # Training # In[10]: # train the model svm = SVMModel() model = svm.train(X_train, Y_train) predictions = svm.predict(model, X_test) # In[12]: # evaluating the model print svm.evaluate(Y_test, predictions, 'matrix') print print svm.evaluate(Y_test, predictions, 'accuracy')