#!/usr/bin/env python # coding: utf-8 # # Adversarial-Robustness-Toolbox for scikit-learn DecisionTreeClassifier # In[1]: from sklearn.ensemble import BaggingClassifier from sklearn.datasets import load_iris import numpy as np from matplotlib import pyplot as plt from art.estimators.classification import SklearnClassifier from art.attacks.evasion import ZooAttack from art.utils import load_mnist import warnings warnings.filterwarnings('ignore') # ## 1 Training scikit-learn BaggingClassifier and attacking with ART Zeroth Order Optimization attack # In[2]: def get_adversarial_examples(x_train, y_train): # Fit BaggingClassifier model = BaggingClassifier() model.fit(X=x_train, y=y_train) # Create ART classifier for scikit-learn BaggingClassifier art_classifier = SklearnClassifier(model=model) # Create ART Zeroth Order Optimization attack zoo = ZooAttack(classifier=art_classifier, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=20, binary_search_steps=10, initial_const=1e-3, abort_early=True, use_resize=False, use_importance=False, nb_parallel=1, batch_size=1, variable_h=0.2) # Generate adversarial samples with ART Zeroth Order Optimization attack x_train_adv = zoo.generate(x_train) return x_train_adv, model # ## 1.1 Utility functions # In[3]: def get_data(num_classes): x_train, y_train = load_iris(return_X_y=True) x_train = x_train[y_train < num_classes][:, [0, 1]] y_train = y_train[y_train < num_classes] x_train[:, 0][y_train == 0] *= 2 x_train[:, 1][y_train == 2] *= 2 x_train[:, 0][y_train == 0] -= 3 x_train[:, 1][y_train == 2] -= 2 x_train[:, 0] = (x_train[:, 0] - 4) / (9 - 4) x_train[:, 1] = (x_train[:, 1] - 1) / (6 - 1) return x_train, y_train # In[4]: def plot_results(model, x_train, y_train, x_train_adv, num_classes): fig, axs = plt.subplots(1, num_classes, figsize=(num_classes * 5, 5)) colors = ['orange', 'blue', 'green'] for i_class in range(num_classes): # Plot difference vectors for i in range(y_train[y_train == i_class].shape[0]): x_1_0 = x_train[y_train == i_class][i, 0] x_1_1 = x_train[y_train == i_class][i, 1] x_2_0 = x_train_adv[y_train == i_class][i, 0] x_2_1 = x_train_adv[y_train == i_class][i, 1] if x_1_0 != x_2_0 or x_1_1 != x_2_1: axs[i_class].plot([x_1_0, x_2_0], [x_1_1, x_2_1], c='black', zorder=1) # Plot benign samples for i_class_2 in range(num_classes): axs[i_class].scatter(x_train[y_train == i_class_2][:, 0], x_train[y_train == i_class_2][:, 1], s=20, zorder=2, c=colors[i_class_2]) axs[i_class].set_aspect('equal', adjustable='box') # Show predicted probability as contour plot h = .01 x_min, x_max = 0, 1 y_min, y_max = 0, 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z_proba = model.predict_proba(np.c_[xx.ravel(), yy.ravel()]) Z_proba = Z_proba[:, i_class].reshape(xx.shape) im = axs[i_class].contourf(xx, yy, Z_proba, levels=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], vmin=0, vmax=1) if i_class == num_classes - 1: cax = fig.add_axes([0.95, 0.2, 0.025, 0.6]) plt.colorbar(im, ax=axs[i_class], cax=cax) # Plot adversarial samples for i in range(y_train[y_train == i_class].shape[0]): x_1_0 = x_train[y_train == i_class][i, 0] x_1_1 = x_train[y_train == i_class][i, 1] x_2_0 = x_train_adv[y_train == i_class][i, 0] x_2_1 = x_train_adv[y_train == i_class][i, 1] if x_1_0 != x_2_0 or x_1_1 != x_2_1: axs[i_class].scatter(x_2_0, x_2_1, zorder=2, c='red', marker='X') axs[i_class].set_xlim((x_min, x_max)) axs[i_class].set_ylim((y_min, y_max)) axs[i_class].set_title('class ' + str(i_class)) axs[i_class].set_xlabel('feature 1') axs[i_class].set_ylabel('feature 2') # # 2 Example: Iris dataset # ### legend # - colored background: probability of class i # - orange circles: class 1 # - blue circles: class 2 # - green circles: class 3 # - red crosses: adversarial samples for class i # In[5]: num_classes = 2 x_train, y_train = get_data(num_classes=num_classes) x_train_adv, model = get_adversarial_examples(x_train, y_train) plot_results(model, x_train, y_train, x_train_adv, num_classes) # In[6]: num_classes = 3 x_train, y_train = get_data(num_classes=num_classes) x_train_adv, model = get_adversarial_examples(x_train, y_train) plot_results(model, x_train, y_train, x_train_adv, num_classes) # # 3 Example: MNIST # ## 3.1 Load and transform MNIST dataset # In[7]: (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() n_samples_train = x_train.shape[0] n_features_train = x_train.shape[1] * x_train.shape[2] * x_train.shape[3] n_samples_test = x_test.shape[0] n_features_test = x_test.shape[1] * x_test.shape[2] * x_test.shape[3] x_train = x_train.reshape(n_samples_train, n_features_train) x_test = x_test.reshape(n_samples_test, n_features_test) y_train = np.argmax(y_train, axis=1) y_test = np.argmax(y_test, axis=1) n_samples_max = 200 x_train = x_train[0:n_samples_max] y_train = y_train[0:n_samples_max] x_test = x_test[0:n_samples_max] y_test = y_test[0:n_samples_max] # ## 3.2 Train BaggingClassifier classifier # In[8]: model = BaggingClassifier(base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0) # In[9]: model.fit(X=x_train, y=y_train) # ## 3.3 Create and apply Zeroth Order Optimization Attack with ART # In[10]: art_classifier = SklearnClassifier(model=model) # In[11]: zoo = ZooAttack(classifier=art_classifier, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=100, binary_search_steps=20, initial_const=1e-3, abort_early=True, use_resize=False, use_importance=False, nb_parallel=10, batch_size=1, variable_h=0.25) # In[12]: x_train_adv = zoo.generate(x_train) # In[13]: x_test_adv = zoo.generate(x_test) # ## 3.4 Evaluate BaggingClassifier on benign and adversarial samples # In[14]: score = model.score(x_train, y_train) print("Benign Training Score: %.4f" % score) # In[15]: plt.matshow(x_train[0, :].reshape((28, 28))) plt.clim(0, 1) # In[16]: prediction = model.predict(x_train[0:1, :])[0] print("Benign Training Predicted Label: %i" % prediction) # In[17]: score = model.score(x_train_adv, y_train) print("Adversarial Training Score: %.4f" % score) # In[18]: plt.matshow(x_train_adv[0, :].reshape((28, 28))) plt.clim(0, 1) # In[19]: prediction = model.predict(x_train_adv[0:1, :])[0] print("Adversarial Training Predicted Label: %i" % prediction) # In[20]: score = model.score(x_test, y_test) print("Benign Test Score: %.4f" % score) # In[21]: plt.matshow(x_test[0, :].reshape((28, 28))) plt.clim(0, 1) # In[22]: prediction = model.predict(x_test[0:1, :])[0] print("Benign Test Predicted Label: %i" % prediction) # In[23]: score = model.score(x_test_adv, y_test) print("Adversarial Test Score: %.4f" % score) # In[24]: plt.matshow(x_test_adv[0, :].reshape((28, 28))) plt.clim(0, 1) # In[25]: prediction = model.predict(x_test_adv[0:1, :])[0] print("Adversarial Test Predicted Label: %i" % prediction)