#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from scipy.stats import mode from scipy.spatial.distance import cdist from sklearn.datasets import load_breast_cancer from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier as skKNeighborsClassifier # ### Implementation 1 # - based on brute force # - similar to scikit-learn algorithm='brute', weights='uniform' # In[2]: class KNeighborsClassifier(): def __init__(self, n_neighbors=5): self.n_neighbors = n_neighbors def fit(self, X, y): self._fit_X = X self.classes_, self._y = np.unique(y, return_inverse=True) return self def predict(self, X): dist_mat = cdist(X, self._fit_X) neigh_ind = np.argsort(dist_mat, axis=1)[:, :self.n_neighbors] return self.classes_[mode(self._y[neigh_ind], axis=1)[0].ravel()] def predict_proba(self, X): dist_mat = cdist(X, self._fit_X) neigh_ind = np.argsort(dist_mat, axis=1)[:, :self.n_neighbors] proba = np.zeros((X.shape[0], len(self.classes_))) pred_labels = self._y[neigh_ind] for idx in pred_labels.T: proba[np.arange(X.shape[0]), idx] += 1 proba /= np.sum(proba, axis=1)[:, np.newaxis] return proba # In[3]: X, y = load_breast_cancer(return_X_y=True) X = StandardScaler().fit_transform(X) clf1 = KNeighborsClassifier().fit(X, y) clf2 = skKNeighborsClassifier().fit(X, y) prob1 = clf1.predict_proba(X) prob2 = clf2.predict_proba(X) assert np.allclose(prob1, prob2) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2)