from __future__ import print_function

try:
    from sklearn.datasets import make_classification
except ImportError:
    import pip
    pip.main(['install', '--user', 'scikit-learn'])
    from sklearn.datasets import make_classification
    
import numpy as np

X, Y = make_classification(n_samples=1000,
                           n_features=2,
                           n_informative=2,
                           n_redundant=0,
                           n_clusters_per_class=2)

# adding some static offset to the data
X = X + 1

from modshogun import RealFeatures, MultilabelSOLabels, MultilabelModel

def create_features(X, constant):
    features = RealFeatures(
                np.c_[X, constant * np.ones(X.shape[0])].T)
    
    return features
from modshogun import MultilabelSOLabels

def create_labels(Y, n_classes):
    try:
        n_samples = Y.shape[0]
    except AttributeError:
        n_samples = len(Y)
        
    labels = MultilabelSOLabels(n_samples, n_classes)
    for i, sparse_label in enumerate(Y):
        try:
            sparse_label = sorted(sparse_label)
        except TypeError:
            sparse_label = [sparse_label]
        labels.set_sparse_label(i, np.array(sparse_label, dtype=np.int32))
    
    return labels

def split_data(X, Y, ratio):
    num_samples = X.shape[0]
    train_samples = int(ratio * num_samples)
    return (X[:train_samples], Y[:train_samples],
            X[train_samples:], Y[train_samples:])

X_train, Y_train, X_test, Y_test = split_data(X, Y, 0.9)

feats_0 = create_features(X_train, 0)
feats_1 = create_features(X_train, 1)
labels = create_labels(Y_train, 2)

model = MultilabelModel(feats_0, labels)
model_with_bias = MultilabelModel(feats_1, labels)

from modshogun import StochasticSOSVM, DualLibQPBMSOSVM, StructuredAccuracy, LabelsFactory
from time import time

sgd = StochasticSOSVM(model, labels)
sgd_with_bias = StochasticSOSVM(model_with_bias, labels)

start = time()
sgd.train()
print(">>> Time taken for SGD *without* threshold tuning = %f" % (time() - start))
start = time()
sgd_with_bias.train()
print(">>> Time taken for SGD *with* threshold tuning    = %f" % (time() - start))

def evaluate_machine(machine,
                     X_test,
                     Y_test,
                     n_classes,
                     bias):
    if bias:
        feats_test = create_features(X_test, 1)
    else:
        feats_test = create_features(X_test, 0)
    
    test_labels = create_labels(Y_test, n_classes)
    
    out_labels = LabelsFactory.to_structured(machine.apply(feats_test))
    evaluator = StructuredAccuracy()
    jaccard_similarity_score = evaluator.evaluate(out_labels, test_labels)
    
    return jaccard_similarity_score 

print(">>> Accuracy of SGD *without* threshold tuning   = %f " % evaluate_machine(sgd, X_test, Y_test, 2, False))
print(">>> Accuracy of SGD *with* threshold tuning      = %f " %evaluate_machine(sgd_with_bias, X_test, Y_test, 2, True))

import matplotlib.pyplot as plt
%matplotlib inline

def get_parameters(weights):
    return -weights[0]/weights[1], -weights[2]/weights[1]

def scatter_plot(X, y):
    zeros_class = np.where(y == 0)
    ones_class = np.where(y == 1)
    plt.scatter(X[zeros_class, 0], X[zeros_class, 1], c='b', label="Negative Class")
    plt.scatter(X[ones_class, 0], X[ones_class, 1], c='r', label="Positive Class")
    
def plot_hyperplane(machine_0,
                    machine_1,
                    label_0,
                    label_1,
                    title,
                    X, y):
    scatter_plot(X, y)
    x_min, x_max = np.min(X[:, 0]) - 0.5, np.max(X[:, 0]) + 0.5
    y_min, y_max = np.min(X[:, 1]) - 0.5, np.max(X[:, 1]) + 0.5
    xx = np.linspace(x_min, x_max, 1000)
    
    m_0, c_0 = get_parameters(machine_0.get_w()) 
    m_1, c_1 = get_parameters(machine_1.get_w())
    yy_0 = m_0 * xx + c_0
    yy_1 = m_1 * xx + c_1
    plt.plot(xx, yy_0, "k--", label=label_0)
    plt.plot(xx, yy_1, "g-", label=label_1)
    
    plt.xlim((x_min, x_max))
    plt.ylim((y_min, y_max))
    plt.grid()
    plt.legend(loc="best")
    plt.title(title)
    plt.show()

fig = plt.figure(figsize=(10, 10))
plot_hyperplane(sgd, sgd_with_bias,
                "Boundary for machine *without* bias for class 0",
                "Boundary for machine *with* bias for class 0",
                "Binary Classification using SO-SVM with/without threshold tuning",
                X, Y)

from modshogun import SparseMultilabel_obtain_from_generic

def plot_decision_plane(machine,
                        title,
                        X, y, bias):
    plt.figure(figsize=(24, 8))
    plt.suptitle(title)
    plt.subplot(1, 2, 1)
    x_min, x_max = np.min(X[:, 0]) - 0.5, np.max(X[:, 0]) + 0.5
    y_min, y_max = np.min(X[:, 1]) - 0.5, np.max(X[:, 1]) + 0.5
    xx = np.linspace(x_min, x_max, 200)
    yy = np.linspace(y_min, y_max, 200)
    x_mesh, y_mesh = np.meshgrid(xx, yy)

    if bias:
        feats = create_features(np.c_[x_mesh.ravel(), y_mesh.ravel()], 1)
    else:
        feats = create_features(np.c_[x_mesh.ravel(), y_mesh.ravel()], 0)
    out_labels = machine.apply(feats)
    z = []
    for i in range(out_labels.get_num_labels()):
        label = SparseMultilabel_obtain_from_generic(out_labels.get_label(i)).get_data()
        if label.shape[0] == 1:
            # predicted a single label
            z.append(label[0])
        elif label.shape[0] == 2:
            # predicted both the classes
            z.append(2)
        elif label.shape[0] == 0:
            # predicted none of the class
            z.append(3)
    z = np.array(z)
    z = z.reshape(x_mesh.shape)
    c = plt.pcolor(x_mesh, y_mesh, z, cmap=plt.cm.gist_heat)
    scatter_plot(X, y)
    plt.xlim((x_min, x_max))
    plt.ylim((y_min, y_max))
    plt.colorbar(c)
    plt.title("Decision Surface")
    plt.legend(loc="best")

    plt.subplot(1, 2, 2)
    weights = machine.get_w()
    m_0, c_0 = get_parameters(weights[:3])
    m_1, c_1 = get_parameters(weights[3:])
    yy_0 = m_0 * xx + c_0
    yy_1 = m_1 * xx + c_1
    plt.plot(xx, yy_0, "r--", label="Boundary for class 0")
    plt.plot(xx, yy_1, "g-", label="Boundary for class 1")
    plt.title("Hyper planes for different classes")
    plt.legend(loc="best")
    plt.xlim((x_min, x_max))
    plt.ylim((y_min, y_max))
    
    plt.show()

plot_decision_plane(sgd,"Model *without* Threshold Tuning", X, Y, False)
plot_decision_plane(sgd_with_bias,"Model *with* Threshold Tuning", X, Y, True)

def load_data(file_name):
    input_file = open(file_name)
    lines = input_file.readlines()
    n_samples = len(lines)
    n_features = len(lines[0].split()) - 1
    Y = []
    X = []
    for line in lines:
        data = line.split()
        Y.append(map(int, data[0].split(",")))
        feats = []
        for feat in data[1:]:
            feats.append(float(feat.split(":")[1]))
        X.append(feats)
    X = np.array(X)
    n_classes = max(max(label) for label in Y) + 1
    return X, Y, n_samples, n_features, n_classes

def test_multilabel_data(train_file,
                         test_file):
    X_train, Y_train, n_samples, n_features, n_classes = load_data(train_file)

    X_test, Y_test, n_samples, n_features, n_classes = load_data(test_file)

    # create features and labels
    multilabel_feats_0 = create_features(X_train, 0)
    multilabel_feats_1 = create_features(X_train, 1)
    multilabel_labels = create_labels(Y_train, n_classes)

    # create multi-label model
    multilabel_model = MultilabelModel(multilabel_feats_0, multilabel_labels)
    multilabel_model_with_bias = MultilabelModel(multilabel_feats_1, multilabel_labels)
    
    # initializing machines for SO-learning
    multilabel_sgd = StochasticSOSVM(multilabel_model, multilabel_labels)
    multilabel_sgd_with_bias = StochasticSOSVM(multilabel_model_with_bias, multilabel_labels)
    
    start = time()
    multilabel_sgd.train()
    t1 = time() - start
    multilabel_sgd_with_bias.train()
    t2 = time() - start - t1
    
    return (evaluate_machine(multilabel_sgd,
                             X_test, Y_test,
                             n_classes, False), t1,
            evaluate_machine(multilabel_sgd_with_bias,
                             X_test, Y_test,
                             n_classes, True), t2)
            

from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import jaccard_similarity_score
from sklearn.preprocessing import LabelBinarizer

def sklearn_implementation(train_file,
                           test_file):
    label_binarizer = LabelBinarizer()

    X_train, Y_train, n_samples, n_features, n_classes = load_data(train_file)
    X_test, Y_test, n_samples, n_features, n_classes = load_data(test_file)

    clf = OneVsRestClassifier(SVC(kernel='linear'))
    start = time()
    clf.fit(X_train, label_binarizer.fit_transform(Y_train))
    t1 = time() - start
    return (jaccard_similarity_score(label_binarizer.fit_transform(Y_test),
                                     clf.predict(X_test)), t1)

def print_table(train_file,
                test_file,
                caption):
    acc_0, t1, acc_1, t2 = test_multilabel_data(train_file,
                                                test_file)
    sk_acc, sk_t1 = sklearn_implementation(train_file,
                                           test_file)
    result = '''
            \t\t%s
            Machine\t\t\t\tAccuracy\tTrain-time\n
            SGD *without* threshold tuning \t%f \t%f
            SGD *with* threshold tuning \t%f \t%f
            scikit-learn's implementation \t%f \t%f
           ''' % (caption, acc_0, t1, acc_1, t2,
               sk_acc, sk_t1)
    print(result)

print_table("../../../data/multilabel/yeast_train.svm",
            "../../../data/multilabel/yeast_test.svm",
            "Yeast dataset")

print_table("../../../data/multilabel/scene_train",
            "../../../data/multilabel/scene_test",
            "Scene dataset")