#!/usr/bin/env python # coding: utf-8 # # Feature extraction with tsfresh transformer # # [tsfresh](https://tsfresh.readthedocs.io) is a tool for extacting summary features # from a collection of time series. It is an unsupervised transformation, and as such # can easily be used as a pipeline stage in classification, clustering and regression # in conjunction with a scikit-learn compatible estimator. # # ## Preliminaries # You have to install tsfresh if you haven't already. To install it, uncomment the cell below: # In[1]: # !pip install --upgrade tsfresh # In[2]: from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import make_pipeline from aeon.datasets import load_arrow_head, load_basic_motions from aeon.transformations.collection.feature_based import TSFresh, TSFreshRelevant # ## Example data set # # We use the ArrowHead data from the [UCR TSC archive](https://timeseriesclassification.com). # as an example dataset. See # [dataset notebook](https://github.com/aeon-toolkit/aeon/blob/main/examples/datasets/provided_data.ipynb) for more details. We only use the first few cases for examples to speed up the # notebook. # In[3]: X, y = load_arrow_head() n_cases = 24 X_train = X[:n_cases, :, :] y_train = y[:n_cases] X_test = X[n_cases : 2 * n_cases, :, :] y_test = y[n_cases : 2 * n_cases] print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) # ## Using tsfresh to extract features # # There are two versions of TSFresh feature extractors wrapped in aeon. The # first is the unsupervised # `TSFresh` which by default extracts all 4662 features. See the # documentation for parameter configuration. # In[4]: t = TSFresh() Xt = t.fit_transform(X_train) Xt2 = t.transform(X_test) print(f"Train shape = {Xt.shape} test shape = {Xt2.shape}") # The second is `TSFreshRelevant` which uses `y` to select the most # relevant features. # In[5]: t = TSFreshRelevant() t.fit(X_train, y_train) Xt = t.transform(X_test) Xt.shape # ## Using tsfresh with scikit estimators # # You can use the tsfresh transformer with any scikit-learn compatible estimator. # # In[6]: classifier = make_pipeline( TSFresh(default_fc_parameters="efficient", show_warnings=False), RandomForestClassifier(), ) classifier.fit(X_train, y_train) classifier.score(X_test, y_test) # For convenience and consistency of use we also have hard coded TSFresh classifier, # regressor and clusterer. # In[7]: from aeon.classification.feature_based import TSFreshClassifier from aeon.clustering.feature_based import TSFreshClusterer cls = TSFreshClassifier(relevant_feature_extractor=False) clst = TSFreshClusterer(n_clusters=2) cls.fit(X_train, y_train) cls.score(X_test, y_test) clst.fit(X_train) print(cls.predict(X_test)) print(clst.predict(X_test)) # By default, the `TSFreshClassifier` uses the supervised # `TSFreshRelevant` and the scitkit `RandomForestClassifier`. # You can # change this through the constructor # In[8]: from aeon.classification.sklearn import RotationForestClassifier cls = TSFreshClassifier(estimator=RotationForestClassifier(n_estimators=5)) cls.fit(X_train, y_train) cls.score(X_test, y_test) # By default, the `TSFreshClusterer` uses the unsupervised `TSFresh` # and the `sklearn` clusterer `KMeans` with default parameters (which fits 8 clusters). # You can also configure this through the constructor. # In[9]: from sklearn.cluster import KMeans clst = TSFreshClusterer(estimator=KMeans(n_clusters=3)) clst.fit(X_train) print(clst.predict(X_test)) # The `TSFreshRegressor` uses the supervised # `TSFreshRelevant` and the scitkit `RandomForestRegressor`. # In[10]: from aeon.regression.feature_based import TSFreshRegressor reg = TSFreshRegressor(relevant_feature_extractor=False) from aeon.datasets import load_covid_3month X, y = load_covid_3month(split="train") reg.fit(X, y) # # ## TSFresh with multivariate time series data # # ``TSFresh`` transformers and all three estimators can be used with multivariate time # series. The transform calculates the features on each channel independently then # concatenate the results. The full transform creates `777*n_channels` features. # In[14]: X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) # In[15]: tsfresh = TSFresh() X = tsfresh.fit_transform(X_train, y_train) X.shape # In[ ]: