import random
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler

def synthetic_data_svm(w, b, n):
    X = torch.normal(0, 1, (n, len(w)))
    y = torch.sign(torch.matmul(X, w) + b)
    return X, y

w_str = torch.tensor([2.0, -3.4])
b_str = 4.2
n_samples = 1000
features, labels = synthetic_data_svm(w_str, b_str, n_samples)


def data_iter(batch_size, features, labels):
    n = len(labels)
    idx = list(range(n))
    random.shuffle(idx)
    for i in range(0, n, batch_size):
        batch_idx = torch.tensor(idx[i:min(i+batch_size, n)])
        yield features[batch_idx], labels[batch_idx].reshape((-1, 1))

def svm_mdl(X, w, b):
    return torch.matmul(X, w) + b


def hinge_loss(y_hat, y, w, lambd=0.01):
    loss = torch.mean(torch.clamp(1 - y * y_hat, min=0))
    reg = lambd * torch.sum(w**2) / 2
    return loss + reg

w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

trainer = torch.optim.SGD([w, b], lr=0.03)


epochs = 30
batch_size = 10
lambd = 0.01

for epoch in range(epochs):
    for X, y in data_iter(batch_size, features, labels):
        trainer.zero_grad()
        y_hat = svm_mdl(X, w, b)
        train_loss = hinge_loss(y_hat, y, w, lambd)
        train_loss.backward()
        trainer.step()
    print(f'epoch {epoch + 1}, loss {train_loss:f}')


def plot_decision_boundary(w, b, w_true, b_true, X, y):
    X_np = X.numpy()
    y_np = y.numpy().ravel()

    plt.scatter(X_np[y_np == 1, 0], X_np[y_np == 1, 1], color='blue', label='Class 1')
    plt.scatter(X_np[y_np == -1, 0], X_np[y_np == -1, 1], color='red', label='Class -1')

    x_vals = np.linspace(X_np[:, 0].min(), X_np[:, 0].max(), 100)
    bayes_boundary = -(w_true[0] * x_vals + b_true) / w_true[1]
    plt.plot(x_vals, bayes_boundary, 'g--', label='True Decision Boundary')

    svm_boundary = -(w[0].item() * x_vals + b.item()) / w[1].item()
    plt.plot(x_vals, svm_boundary, 'b-', label='SVM Decision Boundary')

    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.title("SVM vs. True Decision Boundary")
    plt.show()

plot_decision_boundary(w, b, w_str, b_str, features, labels)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(features.numpy())
svm_sklearn = LinearSVC(loss='hinge', max_iter=10000)
svm_sklearn.fit(X_scaled, labels.numpy().ravel())

print(f'Scikit-learn SVM coefficients: {svm_sklearn.coef_}, intercept: {svm_sklearn.intercept_}')


def plot_sklearn_boundary(svm, X, y, scaler):
    X_np = X.numpy()
    y_np = y.numpy().ravel()

    plt.scatter(X_np[y_np == 1, 0], X_np[y_np == 1, 1], color='blue', label='Class 1')
    plt.scatter(X_np[y_np == -1, 0], X_np[y_np == -1, 1], color='red', label='Class -1')

    x_vals = np.linspace(X_np[:, 0].min(), X_np[:, 0].max(), 100)
    svm_boundary = -(svm.coef_[0, 0] * x_vals + svm.intercept_[0]) / svm.coef_[0, 1]
    plt.plot(x_vals, svm_boundary, 'r-', label='Scikit-learn Decision Boundary')

    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.title("Scikit-learn SVM Decision Boundary")
    plt.show()

plot_sklearn_boundary(svm_sklearn, features, labels, scaler)