This notebook contains an example of generating adversarial samples using a black-box attack against a scikit-learn pipeline consisting of principal component analysis (PCA) and a support vector machine classifier (SVC), but any other valid pipeline would work too. The pipeline is first optimised using grid search with cross validation. The adversarial samples are created with black-box HopSkipJump
attack. The training data is MNIST, becasue of its intuitive visualisation, but any other dataset including tabular data would be suitable too.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from art.utils import load_dataset
from art.estimators.classification import SklearnClassifier
from art.attacks.evasion import HopSkipJump
import warnings
warnings.filterwarnings('ignore')
n_features = 28*28
(x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
x_train = x_train.reshape((x_train.shape[0], n_features))
x_test = x_test.reshape((x_test.shape[0], n_features))
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)
# Select a smaller set of samples to accelerate notebook example, remove for higher accuracy
x_train = x_train[0:1000]
x_test = x_test[0:100]
y_train = y_train[0:1000]
y_test = y_test[0:100]
svc = SVC(C=1.0, kernel='rbf')
pca = PCA()
pipeline = Pipeline(steps=[('pca', pca), ('svc', svc)])
param_grid = {'pca__n_components': [5, 20, 30, 40, 50, 64],
'svc__C': np.logspace(-4, 4, 5)}
search = GridSearchCV(estimator=pipeline, param_grid=param_grid, iid=False, cv=5)
search.fit(x_train, y_train)
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)
Best parameter (CV score=0.908): {'pca__n_components': 20, 'svc__C': 1.0}
classifier = SklearnClassifier(model=search.best_estimator_)
attack = HopSkipJump(classifier=classifier, targeted=False, norm=np.inf, max_iter=100, max_eval=100,
init_eval=100, init_size=100)
x_test_adv = attack.generate(x_test)
HBox(children=(HTML(value='HopSkipJump'), FloatProgress(value=0.0), HTML(value='')))
accuracy_test_benign = search.score(x_test, y_test)
print('Accuracy on benign test samples {}%:'.format(accuracy_test_benign * 100))
Accuracy on benign test samples 91.0%:
accuracy_test_adversarial = search.score(x_test_adv, y_test)
print('Accuracy on adversarial test samples {}%:'.format(accuracy_test_adversarial * 100))
Accuracy on adversarial test samples 5.0%:
plt.matshow(x_test[0].reshape((28, 28)));
print('Predicted label:', search.predict(x_test[0:1])[0])
Predicted label: 7
plt.matshow(x_test_adv[0].reshape((28, 28)));
print('Predicted label:', search.predict(x_test_adv[0:1])[0])
Predicted label: 9
print('L_Infinity-norm:', np.linalg.norm(x_test_adv[0] - x_test[0], ord=np.inf))
L_Infinity-norm: 0.2403459834117515