#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
from scipy.stats import mode
from scipy.spatial.distance import cdist
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier as skKNeighborsClassifier


# ### Implementation 1
# - based on brute force
# - similar to scikit-learn algorithm='brute', weights='uniform'

# In[2]:


class KNeighborsClassifier():
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors

    def fit(self, X, y):
        self._fit_X = X
        self.classes_, self._y = np.unique(y, return_inverse=True)
        return self

    def predict(self, X):
        dist_mat = cdist(X, self._fit_X)
        neigh_ind = np.argsort(dist_mat, axis=1)[:, :self.n_neighbors]
        return self.classes_[mode(self._y[neigh_ind], axis=1)[0].ravel()]

    def predict_proba(self, X):
        dist_mat = cdist(X, self._fit_X)
        neigh_ind = np.argsort(dist_mat, axis=1)[:, :self.n_neighbors]
        proba = np.zeros((X.shape[0], len(self.classes_)))
        pred_labels = self._y[neigh_ind]
        for idx in pred_labels.T:
            proba[np.arange(X.shape[0]), idx] += 1
        proba /= np.sum(proba, axis=1)[:, np.newaxis]
        return proba


# In[3]:


X, y = load_breast_cancer(return_X_y=True)
X = StandardScaler().fit_transform(X)
clf1 = KNeighborsClassifier().fit(X, y)
clf2 = skKNeighborsClassifier().fit(X, y)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)