import numpy as np
import pandas as pd
from lets_plot import *
LetsPlot.setup_html()
from sklearn.datasets import make_moons
from sklearn.preprocessing import scale
from sklearn.metrics.pairwise import check_pairwise_arrays
from sklearn.metrics import euclidean_distances
from sklearn.model_selection import train_test_split
X, y = make_moons(noise = 0.1, n_samples = 1000)
X = scale(X)
data = {'x1':X.T[0], 'x2':X.T[1], 'target':y}
p = ggplot() \
+ geom_point(aes(x='x1', y='x2', fill='target', group='target'),
data=data,
size = 3,
shape = 21,
color='black') \
+ scale_fill_manual(values=['red', 'blue']) \
+ theme(legend_position='none')
p
def make_grid(X, grid_size=40):
min_val, max_val = np.min(X), np.max(X)
delta = (max_val - min_val) / grid_size
xx = yy = np.arange(min_val, min_val + delta * (grid_size + 1), delta)
grid = np.vstack(np.meshgrid(xx, yy)).reshape(2, - 1).T
return grid
grid_size = 40
grid = make_grid(X, grid_size=grid_size)
data_grid = pd.DataFrame(grid, columns=['x1', 'x2'])
data_grid['line'] = np.tile(np.arange(grid_size + 1), grid_size + 1)
p += geom_path(aes(x='x1', y='x2', group='line'), data=data_grid, color='blue', alpha=0.5)
p
def kernel_rbf(x, xi = None, gamma = None):
x, xi = check_pairwise_arrays(x, xi)
if gamma == None:
gamma = 1.0 / x.shape[1]
K = - gamma * euclidean_distances (x, xi, squared = True)
return np.exp (K)
def kernel_poly(x, xi = None, degree = 3, gamma = None, coef0 = 1):
x, xi = check_pairwise_arrays(x, xi)
if gamma is None:
gamma = 1.0 / x.shape[1]
return (coef0 + gamma * np.dot (x, xi.T)) ** degree
def kernel_centerer(K):
n = K.shape[0]
In = np.ones((n, n)) / n
Kc = K - np.dot (In, K) - np.dot (K, In) + np.dot (np.dot (In, K), In)
return Kc
def kernel_eig(K, centered = True):
if not centered:
K = kernel_centerer (K)
eig_val, eig_vec = np.linalg.eigh (K)
# sort eigen values in descending order
idx = np.argsort (eig_val)[::- 1]
eig_val = eig_val[idx]
eig_vec = eig_vec[:, idx]
# zero eigenvectors with zero eigenvalues
eig_vec = eig_vec[:, eig_val > 0]
eig_val = eig_val[eig_val > 0]
return eig_val, eig_vec
class KernelTransformer(object):
def __init__(self, kernel = 'rbf', gamma = None, degree = 3, coef0 = 1):
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel = kernel
def fit(self, X, y):
self.X = X
self.y = y
# compute eigen vectors
K = self.get_kernel (self.X)
self.eig_val, self.eig_vec = kernel_eig(K, centered=False)
return self
def get_kernel(self, x, xi = None):
x, xi = check_pairwise_arrays(x, xi)
if self.kernel == 'poly':
K = kernel_poly (x, xi, self.degree, self.gamma, self.coef0)
elif self.kernel == 'linear':
self.degree = 1
self.coef0 = 0
self.gamma = 1
K = kernel_poly (x, xi, self.degree, self.gamma, self.coef0)
elif self.kernel == 'rbf':
K = kernel_rbf (x, xi, self.gamma)
return K
def transform(self, X):
K = self.get_kernel (X, self.X)
return np.dot(K, self.eig_vec / np.sqrt(self.eig_val))
transformer = KernelTransformer(kernel='rbf', gamma=2, degree=3, coef0=0.)
transformer.fit(X, y);
X_transformed = transformer.transform(X)[:,:2]
feature_data = {'x1':X_transformed[:, 0], 'x2':X_transformed[:, 1], 'target':y}
p = ggplot() \
+ geom_point(aes(x='x1', y='x2', fill='target', group='target'),
data=feature_data,
size = 3,
shape = 21,
color='black') \
+ scale_fill_manual(values =['red', 'blue']) \
+ theme(legend_position='none')
p
transformed_grid = transformer.transform(grid)
transformed_grid = transformed_grid[:,:2]
data_grid = pd.DataFrame (transformed_grid, columns=['x1', 'x2'])
data_grid['line'] = np.tile(np.arange(grid_size + 1), grid_size + 1)
p += geom_path(aes(x='x1', y='x2', group='line'), data=data_grid, color='blue', alpha=0.5)
p
X_train, X_test, y_train, y_test = train_test_split (X, y, test_size = 0.25)
dat = {'x':X_train.T[0], 'y':X_train.T[1], 'variable':y_train}
p = ggplot() \
+ geom_point(aes (x='x1', y='x2', fill='target', group='target'),
data=data,
size = 3,
shape = 21,
color='black') \
+ scale_fill_manual(values =['red', 'blue']) \
+ theme(legend_position='none')
p
def step_function(x, margin = 0, label =[0, 1]):
return np.where (x >= margin, label[1], label[0])
class KernelClassifier(object):
def __init__(self, kernel = 'rbf', gamma = None, degree = 3, coef0 = 1):
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel = kernel
def fit(self, X, y):
self.X = X
self.y = y
# count number of points in subsets
self.n = np.array([np.count_nonzero(1 + np.array (np.where (self.y == self.y[i]))) for i in range(len(self.y))])
# compute bias
a = self.get_kernel(self.X, self.X)
b = step_function(self.y, 0.5,[1, - 1]) / self.n ** 2 / 2.0
self.b = np.sum(np.multiply (a, b))
return self
def get_kernel(self, x, xi):
if self.kernel == 'poly':
return kernel_poly(x, xi, self.degree, self.gamma, self.coef0)
elif self.kernel == 'linear':
self.degree = 1
self.coef0 = 0
self.gamma = 1
return kernel_poly(x, xi, self.degree, self.gamma, self.coef0)
elif self.kernel == 'rbf':
return kernel_rbf (x, xi, self.gamma)
def predict(self, X):
a = self.get_kernel (X, self.X)
b = step_function (self.y, 0.5,[- 1, 1]) / self.n
y = np.sum(np.multiply (a, b), axis = 1) + self.b
return step_function (y)
def decision_function(self, X):
a = self.get_kernel (X, self.X)
b = step_function (self.y, 0.5,[- 1, 1]) / self.n
y = np.sum (np.multiply (a, b), axis = 1) + self.b
return y
classifier = KernelClassifier(kernel='rbf', gamma=6.)
classifier.fit(X_train, y_train);
def get_grid_points(X, resolution=0.05):
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
return np.meshgrid(np.arange(x1_min, x1_max, resolution),
np.arange(x2_min, x2_max, resolution))
xx1, xx2 = get_grid_points(X_train)
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
xx1, xx2, z = xx1.reshape(- 1), xx2.reshape(- 1), Z.reshape(- 1)
region_data = {'x1':xx1, 'x2':xx2, 'region':z + 1}
p += geom_raster(aes(x='x1', y='x2',group='region', fill='region'),
data=region_data, alpha=0.3) \
+ scale_fill_manual(values =['red', 'blue'])
p += theme(axis_text='blank', axis_ticks='blank', axis_line='blank', axis_title='blank', legend_position='none')
p += ggsize(500, 500)
p
test_data = pd.DataFrame(np.column_stack ((X_test, y_test)), columns =['x1', 'x2', 'target'])
p += geom_point(aes(x='x1', y='x2', color='target'),
data=test_data,
size=5,
alpha=0.5, shape=16) \
+ scale_color_manual(values =['red', 'blue'])
p