#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
from scipy.special import expit, xlogy
from scipy.optimize import minimize
from sklearn.base import clone
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV as skCalibratedClassifierCV


# In[2]:


class CalibratedClassifierCV():
    def __init__(self, base_estimator=LinearSVC(random_state=0)):
        self.base_estimator = base_estimator

    def fit(self, X, y):
        self.classes_, y_train = np.unique(y, return_inverse=True)
        self.calibrated_classifiers_ = []
        cv = StratifiedKFold()
        for train, test in cv.split(X, y):
            this_estimator = clone(self.base_estimator)
            this_estimator.fit(X[train], y_train[train])
            calibrated_classifier = _CalibratedClassifier(this_estimator)
            calibrated_classifier.fit(X[test], y_train[test])
            self.calibrated_classifiers_.append(calibrated_classifier)
        return self

    def predict_proba(self, X):
        proba = np.zeros((X.shape[0], len(self.classes_)))
        for i in range(len(self.calibrated_classifiers_)):
            proba += self.calibrated_classifiers_[i].predict_proba(X)
        proba /= len(self.calibrated_classifiers_)
        return proba

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.classes_[np.argmax(proba, axis=1)]


# In[3]:


class _CalibratedClassifier():
    def __init__(self, base_estimator=LinearSVC(random_state=0)):
        self.base_estimator = base_estimator

    def fit(self, X, y):
        self.n_classes = len(np.unique(y))
        y_train = np.zeros((y.shape[0], self.n_classes))
        for i in range(self.n_classes):
            y_train[y == i, i] = 1
        y_prob = self.base_estimator.decision_function(X)
        if self.n_classes == 2:
            y_train = y_train[:, 1].reshape(-1, 1)
            y_prob = y_prob.reshape(-1, 1)
        self.calibrators_ = []
        for i, cur_prob in enumerate(y_prob.T):
            calibrator = _SigmoidCalibration()
            calibrator.fit(cur_prob, y_train[:, i])
            self.calibrators_.append(calibrator)
        return self

    def predict_proba(self, X):
        prob = np.zeros((X.shape[0], self.n_classes))
        y_prob = self.base_estimator.decision_function(X)
        if self.n_classes == 2:
            prob[:, 1] = self.calibrators_[0].predict(y_prob)
            prob[:, 0] = 1 - prob[:, 1]
        else:
            for i, cur_prob in enumerate(y_prob.T):
                prob[:, i] = self.calibrators_[i].predict(cur_prob)
            prob /= np.sum(prob, axis=1)[:, np.newaxis]
        return prob


# In[4]:


class _SigmoidCalibration():
    @staticmethod
    def _cost_grad(AB, X, y):
        P = expit(-(AB[0] * X + AB[1]))
        cost = np.sum(-(xlogy(y, P) + xlogy(1 - y, 1 - P)))
        dA = np.dot(y - P, X)
        dB = np.sum(y - P)
        grad = np.array([dA, dB])
        return cost, grad

    def fit(self, X, y):
        prior0 = np.sum(y == 0)
        prior1 = y.shape[0] - prior0
        y_train = np.zeros(y.shape[0])
        y_train[y == 1] = (prior1 + 1) / (prior1 + 2)
        y_train[y == 0] = 1 / (prior0 + 2)
        AB0 = np.array([0, np.log((prior0 + 1) / (prior1 + 1))])
        res = minimize(fun=self._cost_grad, jac=True, x0=AB0,
                       args=(X, y_train), method='L-BFGS-B')
        self.a_, self.b_ = res.x[0], res.x[1]
        return self
        
    def predict(self, X):
        return expit(-(self.a_ * X + self.b_))


# In[5]:


X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf1 = CalibratedClassifierCV(base_estimator=LinearSVC(max_iter=10000, random_state=0)).fit(X, y)
clf2 = skCalibratedClassifierCV(base_estimator=LinearSVC(max_iter=10000, random_state=0)).fit(X, y)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)


# In[6]:


X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf1 = CalibratedClassifierCV(base_estimator=LogisticRegression(max_iter=10000)).fit(X, y)
clf2 = skCalibratedClassifierCV(base_estimator=LogisticRegression(max_iter=10000)).fit(X, y)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2, atol=1e-5)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)


# In[7]:


X, y = load_iris(return_X_y=True)
clf1 = CalibratedClassifierCV(base_estimator=LinearSVC(max_iter=10000, random_state=0)).fit(X, y)
clf2 = skCalibratedClassifierCV(base_estimator=LinearSVC(max_iter=10000, random_state=0)).fit(X, y)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)


# In[8]:


X, y = load_iris(return_X_y=True)
clf1 = CalibratedClassifierCV(base_estimator=LogisticRegression(max_iter=10000)).fit(X, y)
clf2 = skCalibratedClassifierCV(base_estimator=LogisticRegression(max_iter=10000)).fit(X, y)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)