#!/usr/bin/env python # coding: utf-8 # ## Investigating different cost functions # # In this notebook, we take a look at the different fitting cost functions offered in PyBOP. Cost functions for fitting problems conventionally describe the distance between two points (the target and the prediction) which is to be minimised via PyBOP's optimisation algorithms. # # First, we install and import the required packages below. # In[ ]: get_ipython().run_line_magic('pip', 'install --upgrade pip ipywidgets -q') get_ipython().run_line_magic('pip', 'install pybop -q') import numpy as np import pybop go = pybop.plot.PlotlyManager().go pybop.plot.PlotlyManager().pio.renderers.default = "notebook_connected" # Let's fix the random seed in order to generate consistent output during development, although this does not need to be done in practice. # In[ ]: np.random.seed(8) # For this notebook, we need to construct parameters, a model and a problem class before we can compare differing cost functions. We start with two parameters, but this is an arbitrary selection and can be expanded given the model and data in question. # In[ ]: parameters = pybop.Parameters( pybop.Parameter( "Positive electrode thickness [m]", prior=pybop.Gaussian(7.56e-05, 0.5e-05), bounds=[65e-06, 10e-05], ), pybop.Parameter( "Positive particle radius [m]", prior=pybop.Gaussian(5.22e-06, 0.5e-06), bounds=[2e-06, 9e-06], ), ) # Next, we will construct the Single Particle Model (SPM) with the Chen2020 parameter set, but like the above, this is an arbitrary selection and can be replaced with any PyBOP model. # In[ ]: parameter_set = pybop.ParameterSet.pybamm("Chen2020") model = pybop.lithium_ion.SPM(parameter_set=parameter_set) # Next, as we will need reference data to compare our model predictions to (via the cost function), we will create synthetic data from the model constructed above. # In[ ]: t_eval = np.arange(0, 900, 10) values = model.predict(t_eval=t_eval) # We can then construct the PyBOP dataset class with the synthetic data as, # In[ ]: dataset = pybop.Dataset( { "Time [s]": t_eval, "Current function [A]": values["Current [A]"].data, "Voltage [V]": values["Voltage [V]"].data, } ) # Now, we can put this all together and construct the problem class. In this situation, we are going to compare differing fitting cost functions, so we construct the `FittingProblem`. # In[ ]: problem = pybop.FittingProblem(model, parameters, dataset) # ### Sum of Squared Errors and Root Mean Squared Error # # First, let's start with two commonly-used cost functions: the sum of squared errors (SSE) and the root mean squared error (RMSE). Constructing these classes is very concise in PyBOP, and only requires the problem class. # In[ ]: cost_SSE = pybop.SumSquaredError(problem) cost_RMSE = pybop.RootMeanSquaredError(problem) # Now, we can investigate how these functions differ when fitting the parameters. To acquire the cost value for each of these, we can simply use the call method of the constructed class, such as: # In[ ]: cost_SSE([7.56e-05, 5.22e-06]) # Alternatively, we can use the `Parameters` class for this, # In[ ]: print(parameters.current_value()) cost_SSE(parameters.current_value()) # If we want to generate a random sample of candidate solutions from the parameter class prior, we can also do that as: # In[ ]: sample = parameters.rvs() print(sample) cost_SSE(sample) # #### Comparing RMSE and SSE # # Now, let's vary one of the parameters, and keep a fixed value for the other, to create a scatter plot comparing the cost values for the RMSE and SSE functions. # In[ ]: x_range = np.linspace(4.72e-06, 5.72e-06, 75) y_SSE = [] y_RMSE = [] for i in x_range: y_SSE.append(cost_SSE([7.56e-05, i])) y_RMSE.append(cost_RMSE([7.56e-05, i])) fig = go.Figure() fig.add_trace(go.Scatter(x=x_range, y=y_SSE, mode="lines", name="SSE")) fig.add_trace(go.Scatter(x=x_range, y=y_RMSE, mode="lines", name="RMSE")) fig.show() # In this situation, it's clear that the curvature of the SSE cost is greater than that of the RMSE. This can improve the rate of convergence for certain optimisation algorithms. However, with incorrect hyperparameter values, larger gradients can also result in the algorithm not converging due to sampling locations outside of the "cost valley", e.g. infeasible parameter values. # ### Minkowski distance # # Next, let's investigate the Minkowski distance. The Minkowski cost takes a general form, which allows for hyperparameter calibration on the cost function itself, given by # # $\mathcal{L_p} = \displaystyle \Big(\sum_i |\hat{y_i}-y_i|^p\Big)^{1/p}$ # # where $p ≥ 0$ is the order of the Minkowski distance. # # For $p = 1$, it is the Manhattan distance. # For $p = 2$, it is the Euclidean distance. # For $p ≥ 1$, the Minkowski distance is a metric, but for $0