#!/usr/bin/env python
# coding: utf-8

# # Multi-Fidelity Bayesian Optimization
# 
# This notebook contains an example of using Emukit to perform Bayesian optimization on a function where low-fidelity approximations are availalbe. 
# 
# The high fidelity function to be optimized is the forreseter function given by: 
# 
# $$
# f_{high}(x) = (6x - 2)^2 \sin(12x - 4)
# $$
# 
# The low fidelity approximation of this function is given by:
# 
# $$
# f_{low}(x) = 0.5 f_{high}(x) + 10 (x - 0.5) + 5
# $$
# 
# Both are defined in the interval [0, 1].
# 
# Each evaluation of the high fidelity function costs $5$ units, whereas evaluating the low fidelity function costs $1$.
# 
# We wish to find the location of the maximum of $f_{high}$ while taking advantage of the cheaper to evaluate $f_{low}$.

# In[1]:


# Load function
import emukit.test_functions.forrester

# The multi-fidelity Forrester function is already wrapped as an Emukit UserFunction object in 
# the test_functions package
forrester_fcn, _ = emukit.test_functions.forrester.multi_fidelity_forrester_function()
forrester_fcn_low = forrester_fcn.f[0]
forrester_fcn_high = forrester_fcn.f[1]

# Assign costs
low_fidelity_cost = 1
high_fidelity_cost = 5


# ## Plot Functions

# In[2]:


import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
FIG_SIZE = (12, 8)
import numpy as np
np.random.seed(12345)
from emukit.multi_fidelity.convert_lists_to_array import convert_x_list_to_array

n_plot_points = 100
x_plot = np.linspace(0, 1, 500)[:, None]
y_plot_low = forrester_fcn_low(x_plot)
y_plot_high = forrester_fcn_high(x_plot)

plt.figure(figsize=FIG_SIZE)
plt.plot(x_plot, y_plot_low, 'b')
plt.plot(x_plot, y_plot_high, 'r')
plt.legend(['Low fidelity', 'High fidelity'])
plt.xlim(0, 1)
plt.title('High and low fidelity Forrester functions')
plt.xlabel('x')
plt.ylabel('y');
plt.show()


# ## Bayesian optimization

# ### Define Parameter Space
# 
# The parameter space now contains two parameters: the first is a `ContinuousParameter` that is the $x$ input to the Forrester function. The second is an `InformationSourceParameter` that tells Emukit whether a given fucntion evaluation is to be performed by the high or low fidelity function.

# In[3]:


from emukit.core import ParameterSpace, ContinuousParameter, InformationSourceParameter
n_fidelities = 2

parameter_space = ParameterSpace([ContinuousParameter('x', 0, 1), InformationSourceParameter(n_fidelities)])


# ### Generate Initial Data
# 
# We shall randomly choose 12 low fidelity and then choose 6 of these points at which to evaluate the high fidelity function.

# In[4]:


x_low = np.random.rand(12)[:, None]
x_high = x_low[:6, :]

y_low = forrester_fcn_low(x_low)
y_high = forrester_fcn_high(x_high)


# ## Define Model
# 
# We will use the linear multi-fidelity model defined in Emukit. In this model, the high-fidelity function is modelled as a scaled sum of the low-fidelity function plus an error term:
# 
# $$
# f_{high}(x) = f_{err}(x) + \rho \,f_{low}(x)
# $$

# In[5]:


from emukit.multi_fidelity.models.linear_model import GPyLinearMultiFidelityModel
import GPy
from emukit.multi_fidelity.kernels.linear_multi_fidelity_kernel import LinearMultiFidelityKernel
from emukit.multi_fidelity.convert_lists_to_array import convert_xy_lists_to_arrays
from emukit.model_wrappers import GPyMultiOutputWrapper
from GPy.models.gp_regression import GPRegression

x_array, y_array = convert_xy_lists_to_arrays([x_low, x_high], [y_low, y_high])

kern_low = GPy.kern.RBF(1)
kern_low.lengthscale.constrain_bounded(0.01, 0.5)

kern_err = GPy.kern.RBF(1)
kern_err.lengthscale.constrain_bounded(0.01, 0.5)

multi_fidelity_kernel = LinearMultiFidelityKernel([kern_low, kern_err])
gpy_model = GPyLinearMultiFidelityModel(x_array, y_array, multi_fidelity_kernel, n_fidelities)

gpy_model.likelihood.Gaussian_noise.fix(0.1)
gpy_model.likelihood.Gaussian_noise_1.fix(0.1)

model = GPyMultiOutputWrapper(gpy_model, 2, 5, verbose_optimization=False)
model.optimize()


# In[6]:


x_plot_low = np.concatenate([np.atleast_2d(x_plot), np.zeros((x_plot.shape[0], 1))], axis=1)
x_plot_high = np.concatenate([np.atleast_2d(x_plot), np.ones((x_plot.shape[0], 1))], axis=1)

def plot_model(x_low, y_low, x_high, y_high):
    mean_low, var_low = model.predict(x_plot_low)
    mean_high, var_high = model.predict(x_plot_high)

    plt.figure(figsize=FIG_SIZE)
    def plot_with_error_bars(x, mean, var, color):
        plt.plot(x, mean, color=color)
        plt.fill_between(x.flatten(), mean.flatten() - 1.96*var.flatten(), mean.flatten() + 1.96*var.flatten(), 
                        alpha=0.2, color=color)

    plot_with_error_bars(x_plot_high[:, 0], mean_low, var_low, 'b')
    plot_with_error_bars(x_plot_high[:, 0], mean_high, var_high, 'r')
    plt.plot(x_plot, forrester_fcn_high(x_plot), 'k--')
    plt.scatter(x_low, y_low, color='b')
    plt.scatter(x_high, y_high, color='r')
    plt.legend(['Low fidelity model', 'High fidelity model', 'True high fidelity'])
    plt.title('Low and High Fidelity Models')
    plt.xlim(0, 1)
    plt.xlabel('x')
    plt.ylabel('y');
    plt.show()
    
plot_model(x_low, y_low, x_high, y_high)


# ### Define Acquisition Function
# 
# As in [1] & [2] we shall use the entropy search acquisition function, scaled by the cost of evaluating either the high or low fidelity function.

# In[7]:


from emukit.bayesian_optimization.acquisitions.entropy_search import MultiInformationSourceEntropySearch
from emukit.core.acquisition import Acquisition

# Define cost of different fidelities as acquisition function
class Cost(Acquisition):
    def __init__(self, costs):
        self.costs = costs

    def evaluate(self, x):
        fidelity_index = x[:, -1].astype(int)
        x_cost = np.array([self.costs[i] for i in fidelity_index])
        return x_cost[:, None]
    
    @property
    def has_gradients(self):
        return True
    
    def evaluate_with_gradients(self, x):
        return self.evalute(x), np.zeros(x.shape)

cost_acquisition = Cost([low_fidelity_cost, high_fidelity_cost])
acquisition = MultiInformationSourceEntropySearch(model, parameter_space) / cost_acquisition


# ### Create OuterLoop

# In[8]:


from emukit.core.loop import FixedIntervalUpdater, OuterLoop, SequentialPointCalculator
from emukit.core.loop.loop_state import create_loop_state
from emukit.core.optimization.multi_source_acquisition_optimizer import MultiSourceAcquisitionOptimizer
from emukit.core.optimization import GradientAcquisitionOptimizer

initial_loop_state = create_loop_state(x_array, y_array)
acquisition_optimizer = MultiSourceAcquisitionOptimizer(GradientAcquisitionOptimizer(parameter_space), parameter_space)
candidate_point_calculator = SequentialPointCalculator(acquisition, acquisition_optimizer)
model_updater = FixedIntervalUpdater(model)
loop = OuterLoop(candidate_point_calculator, model_updater, initial_loop_state)


# ### Add Plotting of Acquisition Function
# 
# To see how the acquisition function evolves. This is done by using the `iteration_end_event` on the `OuterLoop`. This is a list of functions where each function should have the signature: `function(loop, loop_state)`. All functions in the list are called after each iteration of the optimization loop.

# In[9]:


def plot_acquisition(loop, loop_state):
    colours = ['b', 'r']
    plt.plot(x_plot_low[:, 0], loop.candidate_point_calculator.acquisition.evaluate(x_plot_low), 'b')
    plt.plot(x_plot_high[:, 0], loop.candidate_point_calculator.acquisition.evaluate(x_plot_high), 'r')
    
    previous_x_collected = loop_state.X[[-1], :]
    fidelity_idx = int(previous_x_collected[0, -1])
    plt.scatter(previous_x_collected[0, 0], 
                loop.candidate_point_calculator.acquisition.evaluate(previous_x_collected), 
                color=colours[fidelity_idx])
    plt.legend(['Low fidelity', 'High fidelity'], fontsize=12)
    plt.title('Acquisition Function at Iteration ' + str(loop_state.iteration))
    plt.xlabel('x')
    plt.xlim(0, 1)
    plt.ylabel('Acquisition Value')
    plt.tight_layout()
    plt.show()
    
loop.iteration_end_event.append(plot_acquisition)


# ### Find Esimated Minimum at Every Iteration
# On each iteration of the optimization loop, find the minimum value of the high fidelity model.

# In[10]:


x_search = np.stack([np.linspace(0, 1, 1000), np.ones(1000)], axis=1)

model_min_mean = []
model_min_loc = []
def calculate_metrics(loop, loop_state):
    mean, var = loop.model_updaters[0].model.predict(x_search)
    model_min_mean.append(np.min(mean))
    model_min_loc.append(x_search[np.argmin(mean), 0])

# subscribe to event
loop.iteration_end_event.append(calculate_metrics)


# ### Run Optimization

# In[11]:


loop.run_loop(forrester_fcn, 10)


# ### Plot Final Model

# In[12]:


is_high_fidelity = loop.loop_state.X[:, -1] == 1
plot_model(x_low=loop.loop_state.X[~is_high_fidelity, 0], y_low=loop.loop_state.Y[~is_high_fidelity],
           x_high=loop.loop_state.X[is_high_fidelity, 0], y_high=loop.loop_state.Y[is_high_fidelity])


# ## Comparison to High Fidelity Only Bayesian Optimization
# 
# This section compares the multi-fidelity optimization to Bayesian optimization using high fidelity observations only.

# In[13]:


from emukit.bayesian_optimization.loops import BayesianOptimizationLoop
from emukit.bayesian_optimization.acquisitions.entropy_search import EntropySearch
from emukit.model_wrappers import GPyModelWrapper
import GPy

# Make model
gpy_model = GPy.models.GPRegression(x_high, y_high)
gpy_model.Gaussian_noise.variance.fix(0.1)
hf_only_model = GPyModelWrapper(gpy_model)

# Create loop
hf_only_space = ParameterSpace([ContinuousParameter('x', 0, 1)])
hf_only_acquisition = EntropySearch(hf_only_model, hf_only_space)
hf_only_loop = BayesianOptimizationLoop(hf_only_space, hf_only_model, hf_only_acquisition)

# Calculate best guess at minimum at each iteration of loop
hf_only_model_min_mean = []
x_search = np.linspace(0, 1, 1000)[:, None]
hf_only_model_min_loc = []
def calculate_metrics(loop, loop_state):
    mean, var = loop.model_updaters[0].model.predict(x_search)
    hf_only_model_min_mean.append(np.min(mean))
    hf_only_model_min_loc.append(x_search[np.argmin(mean)])

# subscribe to event
hf_only_loop.iteration_end_event.append(calculate_metrics)

# Run optimization
hf_only_loop.run_loop(forrester_fcn_high, 10)


# ### Plot Estimated Minimum Location

# In[14]:


# Plot comparison

plt.figure(figsize=FIG_SIZE)
x = np.array(range(len(model_min_mean))) + 1

# Calculate cumulative cost of evaluating high fidelity only observations
n_hf_points = hf_only_loop.loop_state.X.shape[0]
cumulative_cost_hf = high_fidelity_cost * (np.array(range(n_hf_points)) + 1)
cumulative_cost_hf = cumulative_cost_hf[x_high.shape[0]:]

# Calculate cumulative cost of evaluating multi-fidelity observations
cost_mf = cost_acquisition.evaluate(loop.loop_state.X)
cumulative_cost_mf = np.cumsum(cost_mf)
cumulative_cost_mf = cumulative_cost_mf[x_array.shape[0]:]

x_min = np.min([cumulative_cost_hf, cumulative_cost_mf])
x_max = np.max([cumulative_cost_hf, cumulative_cost_mf])

plt.plot(cumulative_cost_hf, hf_only_model_min_loc, 'm', marker='x', markersize=16)
plt.plot(cumulative_cost_mf, model_min_loc, 'c', marker='.', markersize=16)
plt.hlines(x_search[np.argmin(forrester_fcn_high(x_search))], x_min, x_max, color='k', linestyle='--')
plt.legend(['High fidelity only optimization', 'Multi-fidelity only optimization', 'True minimum'])
plt.title('Comparison of Multi-Fidelity and High Fidelity Only Optimizations')
plt.ylabel('Estimated Location of Minimum')
plt.xlabel('Cumulative Cost of Evaluting Objective');
plt.show()


# The multi-fidelity Bayesian optimization has used the low fidelity observations to find the true optimum of the function with a lower cost than the high-fidelity optimization needs.

# # References

# - **[1] Virtual vs. Real: Trading Off Simulations and Physical Experiments in Reinforcement Learning with Bayesian Optimization** Alonso Marco, Felix Berkenkamp, Philipp Hennig, Angela P. Schoellig, Andreas Krause, Stefan Schaal, Sebastian Trimpe 
# - **[2] Multi-Task Bayesian Optimization** Kevin Swersky, Jasper Snoek, Ryan P. Adams