#!/usr/bin/env python # coding: utf-8 # `KDD2024 Tutorial / A Hands-On Introduction to Time Series Classification and Regression` # # Distances measures and distance-based classification and regression # # Similarity-based approaches were one of the first proposals for tackling the classification and regression of time series. Moreover, these types of technique are also common for performing other time series tasks such as clustering, or anomaly detection, among others. # # The first part of this notebook introduces the distances implemented in `aeon`, starting with the simplest, the Euclidean distance. Then, it introduces the Dynamic Time Warping (DTW) distance, its variants, such as Amerced DTW (ADTW), or the Shape DTW (ShapeDTW), and some other alternative distances, such as the Move-Split-Merge (MSM). # # The second part of this notebook, we demonstrate the use of these distances coupled with the `KNearestNeighbors` classifier from `scikit-learn`, as well as a more appropriate implementation of the `KNearestNeighbors` for time series, included in `aeon`. Similarly, approaches for tackling TSER datasets using distance-based approaches are also presented. # # The `aeon` documentation contains notebooks on [distances](https://www.aeon-toolkit.org/en/stable/examples/distances/distances.html) and [distance based classification](https://www.aeon-toolkit.org/en/stable/examples/classification/distance_based.html) and using `aeon` distances with [`scikit-learn`](https://www.aeon-toolkit.org/en/stable/examples/distances/sklearn_distances.html). # ## Table of Contents # # * [Load example data](#load-data) # * [Euclidean Distance (direct alignment)](#ed-distance) # * [Elastic distances: Dynamic Time Warping (DTW)](#dtw) # * [Key distance measures: DTW variants](#dtw-variants) # * [Key distance measures: DTW alternatives](#dtw-alternatives) # * [Distance-based univariate Time Series Classification (TSC)](#distance-tsc) # * [Distance-based Multivariate TSC (MTSC)](#distance-mtsc) # * [Distance-based univariate Time Series Extrinsic Regression (TSER)](#distance-tser) # * [Distance-based Multivariate TSER (MTSER)](#distance-mtser) # * [Advanced distance-based approaches: Elastic Ensemble (EE) and Proximity Forest (PF)](#advanced-approaches) # * [Performance on the UCR univariate classification datasets](#evaluation) # * [References](#references) # In[ ]: get_ipython().system('pip install aeon==0.11.0') get_ipython().system('mkdir -p data') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSC_TRAIN.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSC_TEST.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSC_TRAIN.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSC_TEST.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSER_TRAIN.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSER_TEST.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSER_TRAIN.ts -P data/') get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSER_TEST.ts -P data/') # In[1]: # There are some deprecation warnings present in the notebook, we will ignore them. # Remove this cell if you are interested in finding out what is changing soon, for # aeon there will be big changes in out v1.0.0 release! import warnings warnings.filterwarnings("ignore") # In[2]: from aeon.registry import all_estimators all_estimators( "classifier", filter_tags={"algorithm_type": "distance"}, as_dataframe=True ) # In[3]: all_estimators( "regressor", filter_tags={"algorithm_type": "distance"}, as_dataframe=True ) # ## Load example data # In[4]: from aeon.datasets import load_from_tsfile # Load the univariate TSC dataset as a 3D numpy array X_train_utsc, y_train_utsc = load_from_tsfile("./data/KDD_UTSC_TRAIN.ts", return_type="numpy2D") X_test_utsc, y_test_utsc = load_from_tsfile("./data/KDD_UTSC_TEST.ts", return_type="numpy2D") print("Train shape:", X_train_utsc.shape) print("Test shape:", X_test_utsc.shape) # In[5]: # Load the multivariate dataset as a 3D numpy array X_train_mtsc, y_train_mtsc = load_from_tsfile("./data/KDD_MTSC_TRAIN.ts") X_test_mtsc, y_test_mtsc = load_from_tsfile("./data/KDD_MTSC_TEST.ts") print("Train shape:", X_train_mtsc.shape) print("Test shape:", X_test_mtsc.shape) # In[6]: # Load the univariate TSER dataset as a 3D numpy array X_train_utser, y_train_utser = load_from_tsfile("./data/KDD_UTSER_TRAIN.ts", return_type="numpy2D") X_test_utser, y_test_utser = load_from_tsfile("./data/KDD_UTSER_TEST.ts", return_type="numpy2D") print("Train shape:", X_train_utser.shape) print("Test shape:", X_test_utser.shape) # In[7]: # Load the multivariate TSER dataset as a 3D numpy array X_train_mtser, y_train_mtser = load_from_tsfile("./data/KDD_MTSER_TRAIN.ts") X_test_mtser, y_test_mtser = load_from_tsfile("./data/KDD_MTSER_TEST.ts") print("Train shape:", X_train_mtser.shape) print("Test shape:", X_test_mtser.shape) # # # For our distance measure examples, lets define two different random univariate time series (a and b) with length $m=10$, and plot them. # In[8]: from aeon.testing.data_generation import make_example_1d_numpy import seaborn as sns import matplotlib.pyplot as plt a = make_example_1d_numpy(n_timepoints=10, random_state=0) b = make_example_1d_numpy(n_timepoints=10, random_state=1) sns.lineplot(data=a, label=r"$\mathbf{a}$") sns.lineplot(data=b, label=r"$\mathbf{b}$") plt.show() # ## Euclidean Distance (direct alignment) # # The easiest way to compute the distance between both series may be to compute the direct # alignment, which sums differences between points at the same time steps. This distance # measure is known as Euclidean Distance # # $$d_{ED}(a, b) = \sqrt{\sum_{i=1}^m(a_i-b_i)^2}$$ # # This distance can be computed as follows: # In[9]: import numpy as np ED = np.sqrt(np.sum(np.power(a-b,2))) print(f"The Euclidean distance between a and b is {ED}") # `aeon` includes this distance measure (and a number of other more advanced distances) # in `aeon.distances`. # In[10]: from aeon.distances import euclidean_distance print( f"The Euclidean distance computed in aeon between a and b is", f"{euclidean_distance(a, b)}" ) # More visually, this distance is the squared root of the sum of each squared difference. # Obviously, a wider difference means a higher Euclidean distance. # In[11]: # Plot the time series distances pointwise following the path. def plot_time_series_distances(a, b, cost_matrix, path): sns.lineplot(data=a, label=r"$\mathbf{a}$") sns.lineplot(data=b, label=r"$\mathbf{b}$") # join the points in the path for i in range(len(path)): plt.plot( [path[i][0], path[i][1]], [a[path[i][0]], b[path[i][1]]], 'k--', alpha=0.2 ) plt.text( (path[i][0]+path[i][1])/2, (a[path[i][0]]+b[path[i][1]])/2, f"{cost_matrix[path[i][0], path[i][1]]:.2f}" ) return plt # Compute the squared distance matrix between a and b. def squared_pointwise_distance(a, b): n_timesteps = len(a) distances_matrix = np.zeros((n_timesteps, n_timesteps)) for i in range(n_timesteps): for j in range(n_timesteps): distances_matrix[i, j] = (a[i]-b[j])**2 return distances_matrix cost_matrix = squared_pointwise_distance(a,b) path = [(i, i) for i in range(len(a))] ax = plot_time_series_distances(a, b, cost_matrix, path) plt.show() # Euclidean distance can be also identified as the squared root of sum of the squared # differences of the values in the diagonal of all possibles distances between elements in # both time series. # # First of all, we compute the distance matrix, which has the distances for all the # elements in both time series: # In[12]: from aeon.visualisation.distances._pairwise_distance_matrix import plot_pairwise_distance_matrix # Compute the squared pointwise distances between a and b distance_matrix = squared_pointwise_distance(a, b) # Draw the heatmap ax = plot_pairwise_distance_matrix(distance_matrix, a, b, path) plt.show() # ## Elastic distances: Dynamic Time Warping (DTW) [[1]](#references) # # However, this is not the only way to compute distances between series. DTW allows # alignments across time steps, i.e. the time steps do not have to be aligned but the # first and last time steps. # # In this first example, we allow the DTW to have sucessive alignments advance by at most # one time step along each series. # In[13]: from aeon.distances import dtw_distance print( f"The DTW distance computed in aeon between a and b is", f"{dtw_distance(a, b, window=0.1)}" ) # Graphically, the DTW distance has been computed aligning these points: # In[14]: from aeon.distances import dtw_alignment_path, dtw_cost_matrix path, _ = dtw_alignment_path(a, b, window=0.1) cost_matrix = dtw_cost_matrix(a, b, window=0.1) sns.lineplot(data=a, label=r"$\mathbf{a}$") sns.lineplot(data=b, label=r"$\mathbf{b}$") # join the points in the path for i in range(len(path)): plt.plot([path[i][0], path[i][1]], [a[path[i][0]], b[path[i][1]]], 'k--', alpha=0.2) plt.text( (path[i][0]+path[i][1])/2, (a[path[i][0]]+b[path[i][1]])/2, f"{cost_matrix[path[i][0], path[i][1]]:.2f}" ) plt.show() # As can be observed, now, the distances are not computed for the same timepoints. They # are computed considering the collindant timepoints. # # In this case, the distance is the minimum cost path that satisfies the aforementioned # constraints: # # (Note that for readability of the path, the starting position should be the last one, # in this case the (9,9), and we move the closest timepoint. For example, from (9,9) we # can move to: A) (8, 9) with a cost of 1.32, B) (8, 8) with a cost of 1.32, or C) (9, 8) # with a cost of 1.17. The latter is the right choice.) # In[15]: path, dtw_dist = dtw_alignment_path(a, b, window=0.1) print(f"DTW distance between a and b is {dtw_dist}.") # Compute the DTW cost matrix between a and b cost_matrix = dtw_cost_matrix(a, b, window=0.1) # Draw the heatmap ax = plot_pairwise_distance_matrix(cost_matrix, a, b, path) plt.show() # It is important to know that DTW warping can be more permisive, and allow a wider # alignment. To control this behaviour, we define another constraint: window size. # # This windowing constraint avoids aligning points separated by more than a percentage of # the time series length. In other words, if the window constraint is $w$, the # alignment of the $i$-th point can only be performed with points in the interval # $[i - \lfloor w*m \rfloor, i + \lfloor w*m \rfloor]$. In the previous scenario, no # window size was used (equivalent to have a window constraint of $w=1$.). # # Now, if we reduce the window length to $w=0.5$, we can see that the path changes: # In[16]: w = 0.5 path, dtw_dist = dtw_alignment_path(a, b, window=w) print(f"DTW distance with {w=} between a and b is {dtw_dist}.") # Compute the DTW cost matrix between a and b cost_matrix = dtw_cost_matrix(a, b, window=w) # Draw the heatmap ax = plot_pairwise_distance_matrix(cost_matrix, a, b, path) plt.show() # Now, as can be observed, the path satisfies the windowing constraint. Now, if we do not # desire to control the size, we use $w=1$ (which is the by-default value for $w$): # In[17]: w = 1 path, dtw_dist = dtw_alignment_path(a, b, window=w) print(f"DTW distance with {w=} between a and b is {dtw_dist}.") # Compute the DTW cost matrix between a and b cost_matrix = dtw_cost_matrix(a, b, window=w) # Draw the heatmap ax = plot_pairwise_distance_matrix(cost_matrix, a, b, path) plt.show() # Finally, note that the most popular value for window is $w=0.2$: # In[18]: w = 0.2 path, dtw_dist = dtw_alignment_path(a, b, window=w) print(f"DTW distance with {w=} between a and b is {dtw_dist}.") # Compute the DTW cost matrix between a and b cost_matrix = dtw_cost_matrix(a, b, window=w) # Draw the heatmap ax = plot_pairwise_distance_matrix(cost_matrix, a, b, path) plt.show() # ## Key distance measures: DTW variants # # There are some other important DTW variants presented in the literature such as: # - Amerced Dynamic Time Warping (ADTW) [[2]](#references), which imposes additive penalty for each warping action. # - Shape Dynamic Time Warping (ShapeDTW) [[3]](#references), which scores each alignment of points using information including slope, wavelet transforms and piecewise approximations of the region around the points. # - Weighted Dynamic Time Warping (WDTW) [[4]](#references), which imposes multiplicative penalty based on the distance from the diagonal. # # All these DTW variants and others are implemented in `aeon`: # In[19]: from aeon.distances._distance import DISTANCES # These are the DTW variants available in aeon for time series: dtw_variants = [d["name"] for d in DISTANCES if 'dtw' in d["name"]] print(f"DTW variants available in aeon: {dtw_variants}") # They achieve a different distance value as can be observed: # In[20]: from aeon.distances import get_distance_function for variant in dtw_variants: distance_function = get_distance_function(variant) distance = distance_function(a, b, window=0.2) print(f"{variant} distance between a and b is {distance}") # ## Key distance measures: DTW alternatives # # Some other distance measures included in `aeon` are: # # - Longest Common Subsequence (LCSS) [[5]](#references) # - Manhattan # - Minkowski # - Move-Split-Merge (MSM) [[6]](#references) # - Time Warp Edit (TWE) [[7]](#references) # # Now, we compute the distance between $a$ and $b$ using all possible distances in `aeon`: # In[21]: for dist in DISTANCES: print(f"{dist['name']} distance between a and b is {dist['distance'](a, b)}") # # Distance-based univariate Time Series Classification (TSC) [[8]](#references) # # One of the first ways to classify a set of time series is by means of a k-nearest- # neighbours (kNN) approach coupled with an appropriate time series distance measure. # # The easiest way is to use the `scikit-learn` kNN approach coupled with of the distances # implemented in `aeon`. There are two different ways: # # 1. Pass the distance function as a callable to the `metric` parameter in the constructor # or # 2. Set the `metric` to "precomputed" in the constructor and pass a pairwise distance # matrix to `fit` and `predict`. # # For this example, we load the gunpoint dataset using a 2D numpy array, which has two # dimensions (n_cases, n_timepoints), given that is the accepted format of `scikit-learn`. # Even though `aeon` allows the use of 2D numpy arrays, generally prefers 3D arrays in the # form (n_cases, n_channels, n_timepoints). For more information on data structures in # `aeon`, please see the [data_formats](../datasets/data_structures.ipynb) notebook. # In[22]: from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score knn = KNeighborsClassifier(n_neighbors=1, metric=dtw_distance) knn.fit(X_train_utsc, y_train_utsc) predictions_utsc = knn.predict(X_test_utsc) accuracy = accuracy_score(y_test_utsc, predictions_utsc) print(f"Accuracy of 1-NN (sklearn) with DTW is {accuracy}") # `aeon` also includes a version of the kNN approach, specifically designed to deal with # time series. In this case, the input can be a 3D numpy array, which allows the usage # of this classifier with a multivariate time series, which we will see later. # In[23]: from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier knn_aeon = KNeighborsTimeSeriesClassifier(n_neighbors=1, distance=dtw_distance) knn_aeon.fit(X_train_utsc, y_train_utsc) predictions_utsc_aeon = knn_aeon.predict(X_test_utsc) accuracy = accuracy_score(y_test_utsc, predictions_utsc_aeon) print(f"Accuracy of 1-NN (aeon) with DTW is {accuracy}") # Furthermore, there is one last way for coupling the `aeon` time series distances with the # standard kNN version included in the `scikit-learn` toolkit. This way consists in # precomputing the pairwise distances as follows: # In[24]: from aeon.distances import dtw_pairwise_distance train_dists = dtw_pairwise_distance(X_train_utsc) test_dists = dtw_pairwise_distance(X_test_utsc, X_train_utsc) # scikit-learn KNN classifier with precomputed distances knn = KNeighborsClassifier(metric="precomputed", n_neighbors=1) knn.fit(train_dists, y_train_utsc) predictions_utsc = knn.predict(test_dists) print( f"Accuracy of 1-NN (sklearn) with DTW using precomputed distances is", f"{accuracy_score(y_test_utsc, predictions_utsc)}" ) # As can be observed, all values are the same. # # Distance-based Multivariate TSC (MTSC) [[9]](#references) # # The `scikit-learn` kNN version can not be applied to 3D numpy arrays. However, as we # have just seen, there is a way in which `aeon` time series distances can be coupled with # `scikit-learn` kNN: the use of precomputed pairwise distances (in this case with the # ADTW distance, to show the variety of distances included in `aeon`): # In[25]: from aeon.distances import adtw_pairwise_distance, adtw_distance train_dists = adtw_pairwise_distance(X_train_mtsc) test_dists = adtw_pairwise_distance(X_test_mtsc, X_train_mtsc) # scikit-learn KNN classifier with precomputed distances knn = KNeighborsClassifier(metric="precomputed", n_neighbors=1) knn.fit(train_dists, y_train_mtsc) predictions_mtsc = knn.predict(test_dists) print( f"Accuracy of 1-NN (sklearn) with ADTW using precomputed distances is", f"{accuracy_score(y_test_mtsc, predictions_mtsc)}" ) # Now, if we run the time series specific kNN approach included in `aeon` we do not need # to precompute the pairwise distance matrix, which sometimes may be computationally # and memory intensive: # In[26]: knn_aeon = KNeighborsTimeSeriesClassifier(n_neighbors=1, distance=adtw_distance) knn_aeon.fit(X_train_mtsc, y_train_mtsc) predictions_mtsc_aeon = knn_aeon.predict(X_test_mtsc) accuracy = accuracy_score(y_test_mtsc, predictions_mtsc_aeon) print(f"Accuracy of 1-NN (aeon) with ADTW is {accuracy}") # For those distances already implemented in `aeon`, we can use their identifier instead # of using the callable distance. For example, the ADTW distance is identified as `"adtw"`: # In[27]: knn_aeon = KNeighborsTimeSeriesClassifier(n_neighbors=1, distance="adtw") knn_aeon.fit(X_train_mtsc, y_train_mtsc) predictions_mtsc_aeon = knn_aeon.predict(X_test_mtsc) accuracy = accuracy_score(y_test_mtsc, predictions_mtsc_aeon) print(f"Accuracy of 1-NN (aeon) with ADTW is {accuracy}") # # Distance-based univariate Time Series Extrinsic Regression (TSER) [[10,11]](#references) # # To avoid repeating content, in this section, we demonstrate the usefulness of the # `scikit-learn` kNN aproach coupled with an `aeon` distance for an UTSER # dataset using two of the most powerful distances: MSM and TWE. # In[28]: from aeon.distances import ( msm_pairwise_distance, twe_pairwise_distance, ) from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error train_dists = msm_pairwise_distance(X_train_utser) test_dists = msm_pairwise_distance(X_test_utser, X_train_utser) knn = KNeighborsRegressor(n_neighbors=1, metric="precomputed") knn.fit(train_dists, y_train_utser) predictions_utser = knn.predict(test_dists) mse = mean_squared_error(y_test_utser, predictions_utser) print(f"MSE of 1-NN (sklearn) with MSM is {mse}") knn_aeon = KNeighborsTimeSeriesRegressor(n_neighbors=1, distance="msm") knn_aeon.fit(X_train_utser, y_train_utser) predictions_utser_aeon = knn_aeon.predict(X_test_utser) mse = mean_squared_error(y_test_utser, predictions_utser_aeon) print(f"MSE of 1-NN (aeon) with MSM is {mse}") # In[29]: # Now, for the TWE distance: train_dists = twe_pairwise_distance(X_train_utser, window=0.1) test_dists = twe_pairwise_distance(X_test_utser, X_train_utser, window=0.1) knn = KNeighborsRegressor(n_neighbors=1, metric="precomputed") knn.fit(train_dists, y_train_utser) predictions_utser = knn.predict(test_dists) mse = mean_squared_error(y_test_utser, predictions_utser) print(f"MSE of 1-NN (sklearn) with TWE is {mse}") knn_aeon = KNeighborsTimeSeriesRegressor( n_neighbors=1, distance="twe", distance_params={"window": 0.1} ) knn_aeon.fit(X_train_utser, y_train_utser) predictions_utser_aeon = knn_aeon.predict(X_test_utser) mse = mean_squared_error(y_test_utser, predictions_utser_aeon) print(f"MSE of 1-NN (aeon) with TWE is {mse}") # As can be observed, both ways again achieve the same results for both distances. # # Distance-based Multivariate TSER (MTSER) [[10,11]](#references) # # To avoid repeating content, in this section, we demonstrate the usefulness of the # `scikit-learn` kNN aproach coupled with an `aeon` distance for an MTSER # dataset using other two powerful distances: ERP and WDTW. # In[30]: from aeon.distances import ( erp_pairwise_distance, wdtw_pairwise_distance, ) from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error train_dists = erp_pairwise_distance(X_train_mtser) test_dists = erp_pairwise_distance(X_test_mtser, X_train_mtser) knn = KNeighborsRegressor(n_neighbors=1, metric="precomputed") knn.fit(train_dists, y_train_mtser) predictions_mtser = knn.predict(test_dists) mse = mean_squared_error(y_test_mtser, predictions_mtser) print(f"MSE of 1-NN (sklearn) with ERP is {mse}") knn_aeon = KNeighborsTimeSeriesRegressor(n_neighbors=1, distance="erp") knn_aeon.fit(X_train_mtser, y_train_mtser) predictions_mtser_aeon = knn_aeon.predict(X_test_mtser) mse = mean_squared_error(y_test_mtser, predictions_mtser_aeon) print(f"MSE of 1-NN (aeon) with ERP is {mse}") # In[31]: # Now, for the wdtw distance: train_dists = wdtw_pairwise_distance(X_train_mtser, window=0.1) test_dists = wdtw_pairwise_distance(X_test_mtser, X_train_mtser, window=0.1) knn = KNeighborsRegressor(n_neighbors=1, metric="precomputed") knn.fit(train_dists, y_train_mtser) predictions_mtser = knn.predict(test_dists) mse = mean_squared_error(y_test_mtser, predictions_mtser) print(f"MSE of 1-NN (sklearn) with WDTW is {mse}") knn_aeon = KNeighborsTimeSeriesRegressor( n_neighbors=1, distance="wdtw", distance_params={"window": 0.1} ) knn_aeon.fit(X_train_mtser, y_train_mtser) predictions_mtser_aeon = knn_aeon.predict(X_test_mtser) mse = mean_squared_error(y_test_mtser, predictions_mtser_aeon) print(f"MSE of 1-NN (aeon) with WDTW is {mse}") # As can be observed, the results achieved for both ways, are the same. Note that # callable distances can also be coupled with the `KNeighborsTimeSeriesRegressor` in # `aeon` as follows: # In[32]: # Now, using the WDTW distance as a callable function in aeon: from aeon.distances import wdtw_distance knn_aeon = KNeighborsTimeSeriesRegressor( n_neighbors=1, distance=wdtw_distance, distance_params={"window": 0.1} ) knn_aeon.fit(X_train_mtser, y_train_mtser) predictions_mtser_aeon = knn_aeon.predict(X_test_mtser) mse = mean_squared_error(y_test_mtser, predictions_mtser_aeon) print(f"MSE of 1-NN (aeon) with WDTW as callable function is {mse}") # # Advanced distance-based approaches: Elastic Ensemble (EE) [[12]](#references) and Proximity Forest (PF) [[13]](#references) # # Other more advanced distance-based approaches are the EE and the PF. # # The EE is a weighted ensemble of 11 1-NN classifiers with a range of elastic distance measures. It requires the cross-validation of the weights of each classifier in the ensemble. # # In the case of the PF, the algorithm ensemble highly randomized Proximity Trees, which branch on the proximity of time series to one exemplar time series or another, instead of branching on attribute values, which was the conventional way up to this publication. # In[33]: from aeon.classification.distance_based import ElasticEnsemble ee = ElasticEnsemble( proportion_of_param_options=0.1, proportion_train_in_param_finding=0.3, proportion_train_for_test=0.5, ) ee.fit(X_train_utsc, y_train_utsc) ee_preds = ee.predict(X_test_utsc) accuracy = accuracy_score(y_test_utsc, ee_preds) print(f"Accuracy of Elastic Ensemble is {accuracy}.") # More specifically, the implementation of the EE in `aeon` allows the selection of given distance measures to build the ensemble, as well as modifying some parameters of the cross-validation procedure: # In[34]: ee = ElasticEnsemble( distance_measures=["dtw", "msm"], proportion_of_param_options=0.1, proportion_train_in_param_finding=0.3, proportion_train_for_test=0.5, ) ee.fit(X_train_utsc, y_train_utsc) ee_preds = ee.predict(X_test_utsc) accuracy = accuracy_score(y_test_utsc, ee_preds) print(f"Accuracy of Elastic Ensemble with DTW and MSM is {accuracy}.") # Same behaviour is expected when dealing with the MTSC dataset: # In[35]: ee.fit(X_train_mtsc, y_train_mtsc) ee_preds = ee.predict(X_test_mtsc) accuracy = accuracy_score(y_test_mtsc, ee_preds) print(f"Accuracy of Elastic Ensemble with DTW and MSM is {accuracy}.") # The PF approach has been recently included in `aeon`, and, for now, can only handle univariate TSC problems: # In[36]: from aeon.classification.distance_based import ProximityForest pf = ProximityForest(n_trees=10, random_state=0) pf.fit(X_train_utsc, y_train_utsc) pf_preds = pf.predict(X_test_utsc) accuracy = accuracy_score(y_test_utsc, pf_preds) print(f"Accuracy of Proximity Forest is {accuracy}.") # # Performance on the UCR univariate classification datasets # # Below we show the performance of the distance-based approaches shown on the UCR TSC archive datasets using results from a large scale comparison of TSC algorithms [[8]](#references). The results files are stored on [timeseriesclassification.com](timeseriesclassification.com). # In[37]: from aeon.benchmarking import get_estimator_results_as_array from aeon.datasets.tsc_datasets import univariate names = ["1NN-DTW", "EE", "PF", "ShapeDTW"] results, present_names = get_estimator_results_as_array( names, univariate, include_missing=False ) print(f"The results shape is {results.shape}.") # In[38]: import numpy as np np.mean(results, axis=0) # In[39]: from aeon.visualisation import plot_critical_difference plot_critical_difference(results, names) plt.show() # # References # # [1] Keogh, E., & Ratanamahatana, C. A. Exact indexing of dynamic time warping. Knowledge and information systems, 7, 358-386, 2005. # # [2] Herrmann, M., & Webb, G. I. Amercing: an intuitive and effective constraint for dynamic time warping. Pattern Recognition, 137, 109333, 2023. # # [3] Zhao, J., & Itti, L. shapeDTW: Shape dynamic time warping. Pattern Recognition, 74, 171-184, 2018. # # [4] Jeong, Y. S., Jeong, M. K., & Omitaomu, O. A. Weighted dynamic time warping for time series classification. Pattern recognition, 44(9), 2231-2240, 2011. # # [5] Vlachos, M., Kollios, G., & Gunopulos, D. Discovering similar multidimensional trajectories. In Proceedings 18th international conference on data engineering (pp. 673-684). IEEE, 2002. # # [6] Stefan, A., Athitsos, V., & Das, G. The move-split-merge metric for time series. IEEE transactions on Knowledge and Data Engineering, 25(6), 1425-1438, 2012. # # [7] Marteau, P. F. Time warp edit distance with stiffness adjustment for time series matching. IEEE transactions on pattern analysis and machine intelligence, 31(2), 306-318, 2008. # # [8] Middlehurst, M., Schäfer, P., & Bagnall, A. Bake off redux: a review and experimental evaluation of recent time series classification algorithms. Data Mining and Knowledge Discovery, 1-74, 2024. # # [9] Shifaz, A., Pelletier, C., Petitjean, F., & Webb, G. I. Elastic similarity and distance measures for multivariate time series. Knowledge and Information Systems, 65(6), 2665-2698, 2023. # # [10] Tan, C. W., Bergmeir, C., Petitjean, F., & Webb, G. I. Time series extrinsic regression: Predicting numeric values from time series data. Data Mining and Knowledge Discovery, 35(3), 1032-1060, 2021. # # [11] Guijo-Rubio, D., Middlehurst, M., Arcencio, G., Silva, D. F., & Bagnall, A. Unsupervised feature based algorithms for time series extrinsic regression. Data Mining and Knowledge Discovery, 1-45, 2024. # # [12] Lines, J., & Bagnall, A. Time series classification with ensembles of elastic distance measures. Data Mining and Knowledge Discovery, 29, 565-592, 2015. # # [13] Lucas, B., Shifaz, A., Pelletier, C., O’Neill, L., Zaidi, N., Goethals, B., Petitjean, F. & Webb, G. I. Proximity forest: an effective and scalable distance-based classifier for time series. Data Mining and Knowledge Discovery, 33(3), 607-635, 2019. # # [Return to Table of Contents](#toc)