#!/usr/bin/env python
# coding: utf-8

# `KDD2024 Tutorial / A Hands-On Introduction to Time Series Classification and Regression`

# # Classifier and regressor capabilities
# 
# ## Univariate Classification
# 
# To finish off, we build all classifiers and regressors on our example EEG data and
# compare accuracy. We do not go into depth into the relative performance, because this
#  these are very small toy datasets. We start by listing classifiers by their
#  capabilities

# In[ ]:


get_ipython().system('pip install aeon==0.11.0')
get_ipython().system('mkdir -p data')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSC_TRAIN.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSC_TEST.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSC_TRAIN.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSC_TEST.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSER_TRAIN.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_MTSER_TEST.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSER_TRAIN.ts -P data/')
get_ipython().system('wget -nc https://raw.githubusercontent.com/aeon-tutorials/KDD-2024/main/Notebooks/data/KDD_UTSER_TEST.ts -P data/')


# In[1]:


# There are some deprecation warnings present in the notebook, we will ignore them.
# Remove this cell if you are interested in finding out what is changing soon, for
# aeon there will be big changes in out v1.0.0 release!
import warnings

warnings.filterwarnings("ignore")


# In[2]:


from aeon.registry import all_estimators

uni_cls=all_estimators("classifier", filter_tags={"capability:multivariate": False},
               as_dataframe=True)
print("Univariate series only classifiers\n",uni_cls.iloc[:,0])
uni_reg=all_estimators("regressor", filter_tags={"capability:multivariate": False},
               as_dataframe=True)
print("Univariate only regressors\n",uni_reg.iloc[:,0])

multi_cls=all_estimators("classifier", filter_tags={"capability:multivariate": True},
               as_dataframe=True)
print("Classifiers that can handle multivariate\n",multi_cls.iloc[:,0])
multi_reg=all_estimators("regressor", filter_tags={"capability:multivariate": True},
               as_dataframe=True)
print("Regressors that can handle multivariate\n",multi_reg.iloc[:,0])


# ## Unequal length series
# 
# Currently few classifiers and regessors support unequal length series and none
# internally handle missing values. This will change soon. Until then, we advise using
# padding or truncation
# 

# In[3]:


uneq_cls=all_estimators("classifier", filter_tags={"capability:unequal_length": True},
               as_dataframe=True)
print("Classifiers that can handle unequal length series\n",uneq_cls.iloc[:,0])
uneq_reg=all_estimators("regressor", filter_tags={"capability:unequal_length": True},
               as_dataframe=True)
print("Regressors that can handle unequal length series\n",uneq_reg.iloc[:,0])


# ## Performance on EEG data
# 
# We can create, fit and predict with these list of classifiers. We will use the EEG
# data made for this tutorial. Do not interpret much with regard to relative
# performance, this is for illustrative purposes only. However, the variance in results
#  does suggest that the classifiers work differently. We exclude estimators that
#  require arguments in the constructor such as Pipelines.

# In[4]:


from aeon.datasets import load_from_tsfile

X_train_c, y_train_c = load_from_tsfile("./data/KDD_UTSC_TRAIN.ts")
X_test_c, y_test_c = load_from_tsfile("./data/KDD_UTSC_TEST.ts")

for _, c in uni_cls.iterrows():
    if c[0] not in ["ClassifierPipeline","MrSQMClassifier","WeightedEnsembleClassifier"]:
        clf = c[1]()
        clf.fit(X_train_c, y_train_c)
        print(c[0]," accuracy = ", clf.score(X_test_c, y_test_c))
        print()


# ### Multivariate classifiers on the univariate data
# 
# We can use multivariate classifiers on univariate data (except for MUSE). Some are
# excluded from this example because they require constructor arguments, are very slow
# especially on CPU, require non standard imports or generate many warnings on this data. This cell will take a while to execute.

# In[5]:


excl = ["MUSE", "EncoderClassifier", "ChannelEnsembleClassifier","FCNClassifier",
        "HIVECOTEV2", "IntervalForestClassifier", "LearningShapeletClassifier",
        "InceptionTimeClassifier","IndividualInceptionClassifier",
        "IndividualOrdinalTDE","OrdinalTDE","TapNetClassifier",
        "SignatureClassifier","ResNetClassifier","LITETimeClassifier",
        "IndividualLITETimeClassifier", "MLPClassifier","CNNClassifier",
        "SupervisedIntervalClassifier","REDCOMETS", "ElasticEnsemble"]

for _, c in multi_cls.iterrows():
    if c[0] not in excl:
        clf = c[1]()
        clf.fit(X_train_c, y_train_c)
        print(c[0]," accuracy = ", clf.score(X_test_c, y_test_c))
        print()


# ## Regressors on EEG data
# 
# All of the regressors can handle mulivariate

# In[6]:


from aeon.datasets import load_from_tsfile
from sklearn.metrics import mean_squared_error
X_train_c, y_train_c = load_from_tsfile("./data/KDD_UTSER_TRAIN.ts")
X_test_c, y_test_c = load_from_tsfile("./data/KDD_UTSER_TEST.ts")

excl = ["RegressorPipeline","CNNRegressor","FCNRegressor",
        "InceptionTimeRegressor","IndividualInceptionRegressor",
        "EncoderRegressor","ResNetRegressor","IndividualLITERegressor",
        "LITETimeRegressor", "TapNetRegressor"]

for _, c in multi_reg.iterrows():
    if c[0] not in excl:
        clf = c[1]()
        clf.fit(X_train_c, y_train_c)
        y_pred= clf.predict(X_test_c)
        print(c[0]," MSE = ", mean_squared_error(y_test_c, y_pred))
        print()


# ## Performance on archive data
# 
# We can directly pull published results from our website timeseriesclassification.com.
#  See [this notebook](https://www.aeon-toolkit.org/en/stable/examples/benchmarking/reference_results.html) for more details on how to do this.
# 

# In[7]:


from aeon.benchmarking import get_available_estimators
from aeon.benchmarking.results_loaders import (
    get_estimator_results_as_array,
)
from aeon.visualisation import (
    plot_critical_difference,
)

cls = get_available_estimators(task="classification", return_dataframe=False)
print(len(cls), " classifier results available\n", cls)
resamples_all, data_names = get_estimator_results_as_array(
    estimators=cls, default_only=False
)
# results are loaded from
# https://timeseriesclassification.com/results/ReferenceResults.
# You can download the files directly from there
print(resamples_all.shape)
classifiers = [
    "FreshPRINCE",
    "HIVECOTEV2",
    "InceptionTime",
    "WEASEL-D",
    "MR-Hydra",
    "RDST",
    "QUANT",
    "PF"
]
resamples_all, data_names = get_estimator_results_as_array(
    estimators=classifiers, default_only=False
)
plot = plot_critical_difference(
    resamples_all, classifiers, test="wilcoxon", correction="holm"
)