#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
from scipy.spatial.distance import pdist, squareform
from sklearn.datasets import load_iris
from sklearn.cluster import DBSCAN as skDBSCAN


# ### Implementation 1
# - based on brute force
# - similar to scikit-learn algorithm='brute'

# In[2]:


class DBSCAN():
    def __init__(self, eps=0.5, min_samples=5):
        self.eps = eps
        self.min_samples = min_samples

    def fit(self, X):
        dist = squareform(pdist(X))
        neighbors = np.array([np.where(d <= self.eps)[0] for d in dist])
        n_neighbors = np.array([len(neighbor) for neighbor in neighbors])
        core_samples = n_neighbors >= self.min_samples
        labels = np.full(X.shape[0], -1)
        label_num = 0
        stack = []
        for i in range(len(labels)):
            if labels[i] != -1 or not core_samples[i]:
                continue
            stack.append(i)
            while len(stack):
                cur = stack.pop()
                if labels[cur] == -1:
                    labels[cur] = label_num
                    if core_samples[cur]:
                        for neighbor in neighbors[cur]:
                            if labels[neighbor] == -1 and neighbor not in stack:
                                stack.append(neighbor)
            label_num += 1
        self.core_sample_indices_ = np.where(core_samples)[0]
        self.labels_= labels
        return self

    def fit_predict(self, X):
        self.fit(X)
        return self.labels_


# In[3]:


X, _ = load_iris(return_X_y=True)
clust1 = DBSCAN().fit(X)
clust2 = skDBSCAN().fit(X)
assert np.allclose(clust1.core_sample_indices_, clust2.core_sample_indices_)
assert np.array_equal(clust1.labels_, clust2.labels_)