This notebook contains an example of using Emukit to perform Bayesian optimization on a function where low-fidelity approximations are availalbe.
The high fidelity function to be optimized is the forreseter function given by:
$$ f_{high}(x) = (6x - 2)^2 \sin(12x - 4) $$The low fidelity approximation of this function is given by:
$$ f_{low}(x) = 0.5 f_{high}(x) + 10 (x - 0.5) + 5 $$Both are defined in the interval [0, 1].
Each evaluation of the high fidelity function costs $5$ units, whereas evaluating the low fidelity function costs $1$.
We wish to find the location of the maximum of $f_{high}$ while taking advantage of the cheaper to evaluate $f_{low}$.
# Load function
import emukit.test_functions.forrester
# The multi-fidelity Forrester function is already wrapped as an Emukit UserFunction object in
# the test_functions package
forrester_fcn, _ = emukit.test_functions.forrester.multi_fidelity_forrester_function()
forrester_fcn_low = forrester_fcn.f[0]
forrester_fcn_high = forrester_fcn.f[1]
# Assign costs
low_fidelity_cost = 1
high_fidelity_cost = 5
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
FIG_SIZE = (12, 8)
import numpy as np
np.random.seed(12345)
from emukit.multi_fidelity.convert_lists_to_array import convert_x_list_to_array
n_plot_points = 100
x_plot = np.linspace(0, 1, 500)[:, None]
y_plot_low = forrester_fcn_low(x_plot)
y_plot_high = forrester_fcn_high(x_plot)
plt.figure(figsize=FIG_SIZE)
plt.plot(x_plot, y_plot_low, 'b')
plt.plot(x_plot, y_plot_high, 'r')
plt.legend(['Low fidelity', 'High fidelity'])
plt.xlim(0, 1)
plt.title('High and low fidelity Forrester functions')
plt.xlabel('x')
plt.ylabel('y');
plt.show()
The parameter space now contains two parameters: the first is a ContinuousParameter
that is the $x$ input to the Forrester function. The second is an InformationSourceParameter
that tells Emukit whether a given fucntion evaluation is to be performed by the high or low fidelity function.
from emukit.core import ParameterSpace, ContinuousParameter, InformationSourceParameter
n_fidelities = 2
parameter_space = ParameterSpace([ContinuousParameter('x', 0, 1), InformationSourceParameter(n_fidelities)])
We shall randomly choose 12 low fidelity and then choose 6 of these points at which to evaluate the high fidelity function.
x_low = np.random.rand(12)[:, None]
x_high = x_low[:6, :]
y_low = forrester_fcn_low(x_low)
y_high = forrester_fcn_high(x_high)
We will use the linear multi-fidelity model defined in Emukit. In this model, the high-fidelity function is modelled as a scaled sum of the low-fidelity function plus an error term:
$$ f_{high}(x) = f_{err}(x) + \rho \,f_{low}(x) $$from emukit.multi_fidelity.models.linear_model import GPyLinearMultiFidelityModel
import GPy
from emukit.multi_fidelity.kernels.linear_multi_fidelity_kernel import LinearMultiFidelityKernel
from emukit.multi_fidelity.convert_lists_to_array import convert_xy_lists_to_arrays
from emukit.model_wrappers import GPyMultiOutputWrapper
from GPy.models.gp_regression import GPRegression
x_array, y_array = convert_xy_lists_to_arrays([x_low, x_high], [y_low, y_high])
kern_low = GPy.kern.RBF(1)
kern_low.lengthscale.constrain_bounded(0.01, 0.5)
kern_err = GPy.kern.RBF(1)
kern_err.lengthscale.constrain_bounded(0.01, 0.5)
multi_fidelity_kernel = LinearMultiFidelityKernel([kern_low, kern_err])
gpy_model = GPyLinearMultiFidelityModel(x_array, y_array, multi_fidelity_kernel, n_fidelities)
gpy_model.likelihood.Gaussian_noise.fix(0.1)
gpy_model.likelihood.Gaussian_noise_1.fix(0.1)
model = GPyMultiOutputWrapper(gpy_model, 2, 5, verbose_optimization=False)
model.optimize()
reconstraining parameters rbf.lengthscale reconstraining parameters rbf.lengthscale
x_plot_low = np.concatenate([np.atleast_2d(x_plot), np.zeros((x_plot.shape[0], 1))], axis=1)
x_plot_high = np.concatenate([np.atleast_2d(x_plot), np.ones((x_plot.shape[0], 1))], axis=1)
def plot_model(x_low, y_low, x_high, y_high):
mean_low, var_low = model.predict(x_plot_low)
mean_high, var_high = model.predict(x_plot_high)
plt.figure(figsize=FIG_SIZE)
def plot_with_error_bars(x, mean, var, color):
plt.plot(x, mean, color=color)
plt.fill_between(x.flatten(), mean.flatten() - 1.96*var.flatten(), mean.flatten() + 1.96*var.flatten(),
alpha=0.2, color=color)
plot_with_error_bars(x_plot_high[:, 0], mean_low, var_low, 'b')
plot_with_error_bars(x_plot_high[:, 0], mean_high, var_high, 'r')
plt.plot(x_plot, forrester_fcn_high(x_plot), 'k--')
plt.scatter(x_low, y_low, color='b')
plt.scatter(x_high, y_high, color='r')
plt.legend(['Low fidelity model', 'High fidelity model', 'True high fidelity'])
plt.title('Low and High Fidelity Models')
plt.xlim(0, 1)
plt.xlabel('x')
plt.ylabel('y');
plt.show()
plot_model(x_low, y_low, x_high, y_high)
As in [1] & [2] we shall use the entropy search acquisition function, scaled by the cost of evaluating either the high or low fidelity function.
from emukit.bayesian_optimization.acquisitions.entropy_search import MultiInformationSourceEntropySearch
from emukit.core.acquisition import Acquisition
# Define cost of different fidelities as acquisition function
class Cost(Acquisition):
def __init__(self, costs):
self.costs = costs
def evaluate(self, x):
fidelity_index = x[:, -1].astype(int)
x_cost = np.array([self.costs[i] for i in fidelity_index])
return x_cost[:, None]
@property
def has_gradients(self):
return True
def evaluate_with_gradients(self, x):
return self.evalute(x), np.zeros(x.shape)
cost_acquisition = Cost([low_fidelity_cost, high_fidelity_cost])
acquisition = MultiInformationSourceEntropySearch(model, parameter_space) / cost_acquisition
from emukit.core.loop import FixedIntervalUpdater, OuterLoop, SequentialPointCalculator
from emukit.core.loop.loop_state import create_loop_state
from emukit.core.optimization.multi_source_acquisition_optimizer import MultiSourceAcquisitionOptimizer
from emukit.core.optimization import GradientAcquisitionOptimizer
initial_loop_state = create_loop_state(x_array, y_array)
acquisition_optimizer = MultiSourceAcquisitionOptimizer(GradientAcquisitionOptimizer(parameter_space), parameter_space)
candidate_point_calculator = SequentialPointCalculator(acquisition, acquisition_optimizer)
model_updater = FixedIntervalUpdater(model)
loop = OuterLoop(candidate_point_calculator, model_updater, initial_loop_state)
To see how the acquisition function evolves. This is done by using the iteration_end_event
on the OuterLoop
. This is a list of functions where each function should have the signature: function(loop, loop_state)
. All functions in the list are called after each iteration of the optimization loop.
def plot_acquisition(loop, loop_state):
colours = ['b', 'r']
plt.plot(x_plot_low[:, 0], loop.candidate_point_calculator.acquisition.evaluate(x_plot_low), 'b')
plt.plot(x_plot_high[:, 0], loop.candidate_point_calculator.acquisition.evaluate(x_plot_high), 'r')
previous_x_collected = loop_state.X[[-1], :]
fidelity_idx = int(previous_x_collected[0, -1])
plt.scatter(previous_x_collected[0, 0],
loop.candidate_point_calculator.acquisition.evaluate(previous_x_collected),
color=colours[fidelity_idx])
plt.legend(['Low fidelity', 'High fidelity'], fontsize=12)
plt.title('Acquisition Function at Iteration ' + str(loop_state.iteration))
plt.xlabel('x')
plt.xlim(0, 1)
plt.ylabel('Acquisition Value')
plt.tight_layout()
plt.show()
loop.iteration_end_event.append(plot_acquisition)
On each iteration of the optimization loop, find the minimum value of the high fidelity model.
x_search = np.stack([np.linspace(0, 1, 1000), np.ones(1000)], axis=1)
model_min_mean = []
model_min_loc = []
def calculate_metrics(loop, loop_state):
mean, var = loop.model_updaters[0].model.predict(x_search)
model_min_mean.append(np.min(mean))
model_min_loc.append(x_search[np.argmin(mean), 0])
# subscribe to event
loop.iteration_end_event.append(calculate_metrics)
loop.run_loop(forrester_fcn, 10)
is_high_fidelity = loop.loop_state.X[:, -1] == 1
plot_model(x_low=loop.loop_state.X[~is_high_fidelity, 0], y_low=loop.loop_state.Y[~is_high_fidelity],
x_high=loop.loop_state.X[is_high_fidelity, 0], y_high=loop.loop_state.Y[is_high_fidelity])
This section compares the multi-fidelity optimization to Bayesian optimization using high fidelity observations only.
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop
from emukit.bayesian_optimization.acquisitions.entropy_search import EntropySearch
from emukit.model_wrappers import GPyModelWrapper
import GPy
# Make model
gpy_model = GPy.models.GPRegression(x_high, y_high)
gpy_model.Gaussian_noise.variance.fix(0.1)
hf_only_model = GPyModelWrapper(gpy_model)
# Create loop
hf_only_space = ParameterSpace([ContinuousParameter('x', 0, 1)])
hf_only_acquisition = EntropySearch(hf_only_model, hf_only_space)
hf_only_loop = BayesianOptimizationLoop(hf_only_space, hf_only_model, hf_only_acquisition)
# Calculate best guess at minimum at each iteration of loop
hf_only_model_min_mean = []
x_search = np.linspace(0, 1, 1000)[:, None]
hf_only_model_min_loc = []
def calculate_metrics(loop, loop_state):
mean, var = loop.model_updaters[0].model.predict(x_search)
hf_only_model_min_mean.append(np.min(mean))
hf_only_model_min_loc.append(x_search[np.argmin(mean)])
# subscribe to event
hf_only_loop.iteration_end_event.append(calculate_metrics)
# Run optimization
hf_only_loop.run_loop(forrester_fcn_high, 10)
Optimization restart 1/1, f = 3.057914272962761 Optimization restart 1/1, f = 12.829548655127017 Optimization restart 1/1, f = 16.956886968763108 Optimization restart 1/1, f = 18.051234079403454 Optimization restart 1/1, f = 18.66552727885913 Optimization restart 1/1, f = 18.801049817274947 Optimization restart 1/1, f = 18.946612671647816 Optimization restart 1/1, f = 18.703295010768795 Optimization restart 1/1, f = 22.86112930242502 Optimization restart 1/1, f = 26.149058523232725 Optimization restart 1/1, f = 26.65207985621092
# Plot comparison
plt.figure(figsize=FIG_SIZE)
x = np.array(range(len(model_min_mean))) + 1
# Calculate cumulative cost of evaluating high fidelity only observations
n_hf_points = hf_only_loop.loop_state.X.shape[0]
cumulative_cost_hf = high_fidelity_cost * (np.array(range(n_hf_points)) + 1)
cumulative_cost_hf = cumulative_cost_hf[x_high.shape[0]:]
# Calculate cumulative cost of evaluating multi-fidelity observations
cost_mf = cost_acquisition.evaluate(loop.loop_state.X)
cumulative_cost_mf = np.cumsum(cost_mf)
cumulative_cost_mf = cumulative_cost_mf[x_array.shape[0]:]
x_min = np.min([cumulative_cost_hf, cumulative_cost_mf])
x_max = np.max([cumulative_cost_hf, cumulative_cost_mf])
plt.plot(cumulative_cost_hf, hf_only_model_min_loc, 'm', marker='x', markersize=16)
plt.plot(cumulative_cost_mf, model_min_loc, 'c', marker='.', markersize=16)
plt.hlines(x_search[np.argmin(forrester_fcn_high(x_search))], x_min, x_max, color='k', linestyle='--')
plt.legend(['High fidelity only optimization', 'Multi-fidelity only optimization', 'True minimum'])
plt.title('Comparison of Multi-Fidelity and High Fidelity Only Optimizations')
plt.ylabel('Estimated Location of Minimum')
plt.xlabel('Cumulative Cost of Evaluting Objective');
plt.show()
The multi-fidelity Bayesian optimization has used the low fidelity observations to find the true optimum of the function with a lower cost than the high-fidelity optimization needs.