#!/usr/bin/env python
# coding: utf-8

# # Advanced classification models
# 
# This example shows how to use more advanced classifiers instead of the linear classifier that is used by default.

# In[1]:


import numpy as np
from sklearn.preprocessing import OneHotEncoder

from reservoir_computing.modules import RC_model
from reservoir_computing.utils import compute_test_scores
from reservoir_computing.datasets import ClfLoader

np.random.seed(0) # Fix the seed for reproducibility


# ## Prepare the data
# 
# We will use the dataloader `ClfLoader` to get a forecasting datatset.
# To see what datatsets are available, we can call the function `available_datasets`. By setting `details=True` we can get additional information.

# In[3]:


downloader = ClfLoader()
downloader.available_datasets(details=False)  # Describe available datasets


# Next, we load the dataset of MTS representing the sound of different Japanese vowels pronounced by nine different speakers. The goal is to classify the speaker correctly. Note that we need to transform the labels to one-hot encoded vectors.

# In[4]:


Xtr, Ytr, Xte, Yte = downloader.get_data('Japanese_Vowels')


# In[5]:


# One-hot encoding for labels
onehot_encoder = OneHotEncoder(sparse_output=False)
Ytr = onehot_encoder.fit_transform(Ytr)
Yte = onehot_encoder.transform(Yte)


# Then, we define the configuration of the Reservoir, the dimensionality reduction module, and the type of Multivariate Time Series (MTS) representation.

# In[9]:


config = {}

# Hyperarameters of the reservoir
config['n_internal_units'] = 450        # size of the reservoir
config['spectral_radius'] = 0.59        # largest eigenvalue of the reservoir
config['leak'] = 0.6                    # amount of leakage in the reservoir state update (None or 1.0 --> no leakage)
config['connectivity'] = 0.25           # percentage of nonzero connections in the reservoir
config['input_scaling'] = 0.1           # scaling of the input weights
config['noise_level'] = 0.01            # noise in the reservoir state update
config['n_drop'] = 5                    # transient states to be dropped
config['bidir'] = True                  # if True, use bidirectional reservoir
config['circle'] = False                # use reservoir with circle topology

# Dimensionality reduction hyperparameters
config['dimred_method'] = 'tenpca'      # options: {None (no dimensionality reduction), 'pca', 'tenpca'}
config['n_dim'] = 75                    # number of resulting dimensions after the dimensionality reduction procedure

# Type of MTS representation
config['mts_rep'] = 'reservoir'         # MTS representation:  {'last', 'mean', 'output', 'reservoir'}
config['w_ridge_embedding'] = 10.0      # regularization parameter of the ridge regression


# ## Linear readout
# 
# We will start using a simple linear classifier as the readout. In particular, we will use the [RidgeClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeClassifier.html) from sklearn. The classifier requires to define a regularization parameter that we call `w_ridge` (but in sklearn is called `alpha`).

# In[10]:


# Type of readout
config['readout_type'] = 'lin'          # readout used for classification
config['w_ridge'] = 1.0                 # regularization of the ridge regression readout


# At this point we initialize the RC classifier by passing the configuration we specified before and then we fit it on the training data.

# In[11]:


classifier =  RC_model(**config)

# Train the model
tr_time = classifier.fit(Xtr, Ytr) 


# At this point, we can predict the labels of the test set and see how much they resemble the real ones by computing the classification accuracy and the F1 score.

# In[12]:


# Compute predictions on test data
pred_class = classifier.predict(Xte) 
accuracy, f1 = compute_test_scores(pred_class, Yte)
print(f"Accuracy = {accuracy:.3f}, F1 = {f1:.3f}")


# That is a pretty high accuracy. Even a simple model such as the RidgeClassifier can classify almost perfectly the test data thanks to the powerful representational power of the representation provided by the RC model.
# 
# Next, we will try more classifiers more powerful than the RidgeClassifier. In this example, we do not expect to see extreme changes in the performance since the classification performance is already very high. However, in more complex tasks using a more powerful classifier can bring substantial benefits.
# 
# ## Support Vector Classifier readout
# 
# We will start with [SVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) the Support Vector Machine Classifier of sklearn.
# 
# The first thing is to define the hyperparameters of the new classifier and pass them to the RC model.

# In[13]:


# Type of readout
config['readout_type'] = 'svm'          # readout used for classification
config['svm_gamma'] = 5e-3              # bandwith of the RBF kernel
config['svm_C'] = 10.0                  # regularization for SVM hyperplane


# Next, we re-create the RC model, we train, and then we test it.

# In[14]:


classifier =  RC_model(**config)

# Train the model
tr_time = classifier.fit(Xtr, Ytr) 

# Compute predictions on test data
pred_class = classifier.predict(Xte) 
accuracy, f1 = compute_test_scores(pred_class, Yte)
print(f"Accuracy = {accuracy:.3f}, F1 = {f1:.3f}")


# As expected, the performance is still good but not much different from the one we got earlier.
# 
# ## Multi Layer Perceptron readout
# 
# Next, we can use a simple neural network as the classifier. We will use the Multilayer Perceptron ([MLPClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html)) from sklearn.
# 
# In this case, we have more hyperparameters to tune. To find the optimal ones when dealing with a real-world application you should do a proper hyperparameter search using a validation set.

# In[15]:


# Type of readout
config['readout_type'] = 'mlp'          # readout used for classification
config['mlp_layout'] = (64,32)          # neurons in each MLP layer
config['num_epochs'] = 2000             # number of epochs 
config['w_l2'] = 1e-4                   # weight of the L2 regularization
config['nonlinearity'] = 'tanh'         # type of activation function {'relu', 'tanh', 'logistic', 'identity'}


# As before, we create our RC classifier, we train it and test on unseen data.

# In[16]:


classifier =  RC_model(**config)

# Train the model
tr_time = classifier.fit(Xtr, Ytr) 

# Compute predictions on test data
pred_class = classifier.predict(Xte) 
accuracy, f1 = compute_test_scores(pred_class, Yte)
print(f"Accuracy = {accuracy:.3f}, F1 = {f1:.3f}")


# Also in this case, the classifier obtains good performance but not too different from the previous cases.
# 
# More complicated models such as SVC and an MLP requires a proper tuning but, on difficult task, can achieve better performance compared to a simple linear classifier.