import numpy as np
import matplotlib.pyplot as plt

data = np.loadtxt('https://jonghank.github.io/ase3001/files/fit_data.csv', \
                  delimiter=',')
u, v = data[:,0], data[:,1]

plt.figure(figsize=(6,6), dpi=100)
plt.plot(u, v, 'o', alpha=0.5)
plt.grid()
plt.axis('square')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$u$')
plt.ylabel(r'$v$')
plt.title('Raw data')
plt.show()

n = len(u)
d = 6

X = np.zeros((n,d))
for i in range(d):
  X[:,i] = u**i

y = v

theta_opt = np.linalg.lstsq(X, y, rcond=None)[0]

print(f'Optimal theta: {theta_opt}')

vp = np.linspace(0, 1, 100)

X_vp = np.zeros((len(vp),d))
for i in range(d):
  X_vp[:,i] = vp**i;

plt.figure(figsize=(6,6), dpi=100)
plt.plot(u, v, 'o', alpha=0.5, label='Raw data')
plt.plot(vp, np.dot(X_vp, theta_opt), label='Predictor')
plt.grid()
plt.axis('square')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$u$')
plt.ylabel(r'$v$')
plt.title("Polynomial predictor")
plt.legend()
plt.show()

import pandas as pd

df = pd.read_csv('https://jonghank.github.io/ase3001/files/diabetes_data.txt', \
                 delimiter='\t')

df

n, d = df.shape
X = np.hstack((np.ones((n,1)), df.values[:,:-1]))
y = df.values[:,-1]

theta_opt = np.linalg.lstsq(X, y, rcond=None)[0]

MSE = np.linalg.norm(X.dot(theta_opt)-y)**2/n

print(f'MSE: {MSE}')

plt.figure(figsize=(6,6), dpi=100)
plt.plot(y, y, 'k')
plt.plot(y, X.dot(theta_opt), 'o', alpha=0.5)
plt.xlabel('y')
plt.ylabel(r'$\hat{y}$')
plt.axis('square')
plt.grid()

# note that each feature represents
# u1:  age
# u2:  sex
# u3:  bmi         body mass index
# u4:  map	       mean arterial pressure
# u5:  s1 (tc) :   total cholesterol
# u6:  s2 (ldl):   low density lipoprotein
# u7:  s3 (hdl):   high density lipoprotein
# u8:  s4 (tch):
# u9:  s5 (ltg):
# u10: s6 (glu):
# features:      age  sex      bmi   map      tc      ldl     hdl   tch       ltg    glu
X_JHK = np.array([41,   1,    18.3,   90,    171,    80.0,   74.9,    2,     4.75,  90.0])
X_JHK = np.hstack((1, X_JHK))

y_JHK = X_JHK.dot(theta_opt)

plt.figure(figsize=(6,6), dpi=100)
plt.plot(y, y, 'k')
plt.plot(y, X.dot(theta_opt), 'o', alpha=0.5)
plt.plot(y_JHK, y_JHK, 'ro', label='JHK')
plt.xlabel('y')
plt.ylabel(r'$\hat{y}$')
plt.legend()
plt.axis('square')
plt.grid()

import numpy as np
import matplotlib.pyplot as plt

yhat = np.arange(-3, 3, 0.01)

plt.figure(figsize=(12,5))
plt.subplot(121)
plt.plot(yhat, (yhat>0), label='Neymann Pearson loss')
plt.plot(yhat, np.maximum(1+yhat,0), alpha=0.5, linewidth=4, label='Hinge loss')
plt.grid(), plt.legend(), plt.axis('equal'), plt.xlim(-3, 3)
plt.xlabel(r'$\tilde{y}$'), plt.ylabel('Loss')
plt.title('Loss for $y^{(i)}=-1$')
plt.subplot(122)
plt.plot(yhat, (yhat<0), label='Neymann Pearson loss')
plt.plot(yhat, np.maximum(1-yhat,0), alpha=0.5, linewidth=4, label='Hinge loss')
plt.grid(), plt.legend(), plt.axis('equal'), plt.xlim(-3, 3)
plt.xlabel(r'$\tilde{y}$'), plt.ylabel('Loss')
plt.title('Loss for $y^{(i)}=1$')
plt.show()

np.random.seed(7030)
xp = np.random.randn(2,100)
xp[0,:] += 0.2*xp[1,:]
xp += 6*np.random.rand(2,1)
yp = np.ones(xp.shape[1])

xn = np.random.randn(2,200)
xn[0,:] -= 1*xn[1,:]
xn += -6*np.random.rand(2,1)
yn = -np.ones(xn.shape[1])

X = np.vstack((xp.T, xn.T))
y = np.hstack((yp, yn))

plt.figure(figsize=(6,6), dpi=100)
plt.plot(xp[0,:], xp[1,:], 'o', alpha=0.5, label=r'$y=1$')
plt.plot(xn[0,:], xn[1,:], 'o', alpha=0.5, label=r'$y=-1$')
plt.xlabel(r'$x_1$'), plt.ylabel(r'$x_2$')
plt.xlim([-6, 6]), plt.ylim([-6, 6])
plt.legend(), plt.grid()
plt.show()

def solve_svm(X, y, lam=1):

  import cvxpy as cp

  w = cp.Variable(X.shape[1])
  b = cp.Variable()

  obj = cp.sum(cp.pos(1-cp.multiply(y,(X@w-b))))
  reg = cp.sum_squares(w)
  problem = cp.Problem(cp.Minimize(obj + lam*reg))
  problem.solve(solver=cp.CLARABEL)

  return w.value, b.value, obj.value, reg.value

w_svm, b_svm, obj, reg = solve_svm(X, y, 1)

v1 = np.arange(-8, 8, 0.01)
v2 = (b_svm     - w_svm[0]*v1)/w_svm[1]
vp = (b_svm + 1 - w_svm[0]*v1)/w_svm[1]
vn = (b_svm - 1 - w_svm[0]*v1)/w_svm[1]

plt.figure(figsize=(6,6), dpi=100)
plt.plot(xp[0,:], xp[1,:], 'o', alpha=0.5, label=r'$y=1$')
plt.plot(xn[0,:], xn[1,:], 'o', alpha=0.5, label=r'$y=-1$')
plt.plot(v1, vp, '--', label=r'$w^Tx-b=1$')
plt.plot(v1, v2,       label=r'$w^Tx-b=0$')
plt.plot(v1, vn, '--', label=r'$w^Tx-b=-1$')
plt.xlabel(r'$x_1$'), plt.ylabel(r'$x_2$')
plt.title('Support vector machine on a linearly separable set')
plt.xlim([-6, 6]), plt.ylim([-6, 6])
plt.legend(), plt.grid()
plt.show()

print(f'Loss: {obj}')
print(f'Margin: {1/np.sqrt(reg)}')

np.random.seed(7030)
xp = np.random.randn(2,100)
xp[0,:] += 0.2*xp[1,:]
xp += 3*np.random.rand(2,1)
yp = np.ones(xp.shape[1])

xn = np.random.randn(2,200)
xn[0,:] -= 1*xn[1,:]
xn += -3*np.random.rand(2,1)
yn = -np.ones(xn.shape[1])

X = np.vstack((xp.T, xn.T))
y = np.hstack((yp, yn))

w_svm, b_svm, obj, reg = solve_svm(X, y, 1)

v1 = np.arange(-8, 8, 0.01)
v2 = (b_svm     - w_svm[0]*v1)/w_svm[1]
vp = (b_svm + 1 - w_svm[0]*v1)/w_svm[1]
vn = (b_svm - 1 - w_svm[0]*v1)/w_svm[1]

plt.figure(figsize=(6,6), dpi=100)
plt.plot(xp[0,:], xp[1,:], 'o', alpha=0.5, label=r'$y=1$')
plt.plot(xn[0,:], xn[1,:], 'o', alpha=0.5, label=r'$y=-1$')
plt.plot(v1, vp, '--', label=r'$w^Tx-b=1$')
plt.plot(v1, v2,       label=r'$w^Tx-b=0$')
plt.plot(v1, vn, '--', label=r'$w^Tx-b=-1$')
plt.xlabel(r'$x_1$'), plt.ylabel(r'$x_2$')
plt.title('Support vector machine on a linearly non-separable set')
plt.xlim([-6, 6]), plt.ylim([-6, 6])
plt.legend(), plt.grid()
plt.show()

print(f'Loss: {obj}')
print(f'Margin: {1/np.sqrt(reg)}')

import numpy as np
import matplotlib.pyplot as plt

ytilde = np.arange(-3, 3, 0.01)

plt.figure(figsize=(12,5), dpi=100)
plt.subplot(121)
plt.plot(ytilde, (ytilde>0), label='Neymann Pearson loss')
plt.plot(ytilde, np.maximum(1+ytilde,0), alpha=0.5, linewidth=4, label='Hinge loss')
plt.plot(ytilde, np.log(1+np.exp(ytilde)), alpha=0.5, linewidth=4, label='Logistic loss')
plt.grid(), plt.legend(), plt.axis('equal'), plt.xlim(-3, 3)
plt.xlabel(r'$\tilde{y}$'), plt.ylabel('Loss')
plt.title('Loss for $y^{(i)}=-1$')
plt.subplot(122)
plt.plot(ytilde, (ytilde<0), label='Neymann Pearson loss')
plt.plot(ytilde, np.maximum(1-ytilde,0), alpha=0.5, linewidth=4, label='Hinge loss')
plt.plot(ytilde, np.log(1+np.exp(-ytilde)), alpha=0.5, linewidth=4, label='Logistic loss')
plt.grid(), plt.legend(), plt.axis('equal'), plt.xlim(-3, 3)
plt.xlabel(r'$\tilde{y}$'), plt.ylabel('Loss')
plt.title('Loss for $y^{(i)}=1$')
plt.show()

def solve_logistic_regression(X, y, lam=1):

  import cvxpy as cp

  w = cp.Variable(X.shape[1])
  b = cp.Variable()

  obj = cp.sum(cp.logistic(-cp.multiply(y,(X@w-b))))
  reg = cp.sum_squares(w)
  problem = cp.Problem(cp.Minimize(obj + lam*reg))
  problem.solve(solver=cp.CLARABEL)

  return w.value, b.value, obj.value, reg.value

np.random.seed(7030)
xp = np.random.randn(2,100)
xp[0,:] += 0.5*xp[1,:]
xp += 5*np.random.rand(2,1)
yp = np.ones(xp.shape[1])

xn = np.random.randn(2,200)
xn[0,:] -= xn[1,:]
xn += -5*np.random.rand(2,1)
yn = -np.ones(xn.shape[1])

X = np.vstack((xp.T, xn.T))
y = np.hstack((yp, yn))

plt.figure(figsize=(6,6), dpi=100)
plt.plot(xp[0,:], xp[1,:], 'o', alpha=0.5, label=r'$y=1$')
plt.plot(xn[0,:], xn[1,:], 'o', alpha=0.5, label=r'$y=-1$')
plt.xlabel(r'$x_1$'), plt.ylabel(r'$x_2$')
plt.xlim([-6, 6]), plt.ylim([-6, 6])
plt.legend(), plt.grid()
plt.show()

w_svm, b_svm, obj_svm, reg_svm, = solve_svm(X, y, 1)
w_log, b_log, obj_log, reg_log, = solve_logistic_regression(X, y, 1)

v1_svm = np.arange(-8, 8, 0.01)
v2_svm = (b_svm - w_svm[0]*v1_svm)/w_svm[1]

v1_log = np.arange(-8, 8, 0.01)
v2_log = (b_log - w_log[0]*v1_log)/w_log[1]

plt.figure(figsize=(6,6), dpi=100)
plt.plot(xp[0,:], xp[1,:], 'o', alpha=0.5, label=r'$y=1$')
plt.plot(xn[0,:], xn[1,:], 'o', alpha=0.5, label=r'$y=-1$')
plt.plot(v1_svm, v2_svm, label='Support vector machine')
plt.plot(v1_log, v2_log, label='Logistic regression')
plt.xlabel(r'$x_1$'), plt.ylabel(r'$x_2$')
plt.title('Binary classifiers')
plt.xlim([-6, 6]), plt.ylim([-6, 6])
plt.legend(), plt.grid()
plt.show()