#!/usr/bin/env python
# coding: utf-8

# In[1]:


from orion.data import load_signal, load_anomalies


# # 1. Data

# In[2]:


signal_name = 'S-1'

data = load_signal(signal_name)

anomalies = load_anomalies(signal_name)

data.head()


# # 2. Pipeline

# In[3]:


from mlblocks import MLPipeline

pipeline_name = 'timesfm'

pipeline = MLPipeline(pipeline_name)


# ## step by step execution
# 
# MLPipelines are compose of a squence of primitives, these primitives apply tranformation and calculation operations to the data and updates the variables within the pipeline. To view the primitives used by the pipeline, we access its `primtivies` attribute. 
# 
# The `UniTS` contains 6 primitives. we will observe how the `context` (which are the variables held within the pipeline) are updated after the execution of each primitive.

# In[4]:


pipeline.primitives


# ### time segments aggregate
# this primitive creates an equi-spaced time series by aggregating values over fixed specified interval.
# 
# * **input**: `X` which is an n-dimensional sequence of values.
# * **output**:
#     - `X` sequence of aggregated values, one column for each aggregation method.
#     - `index` sequence of index values (first index of each aggregated segment).

# In[5]:


context = pipeline.fit(data, output_=0)
context.keys()


# In[6]:


for i, x in list(zip(context['index'], context['X']))[:5]:
    print("entry at {} has value {}".format(i, x))


# ### SimpleImputer
# this primitive is an imputation transformer for filling missing values.
# * **input**: `X` which is an n-dimensional sequence of values.
# * **output**: `X` which is a transformed version of X.

# In[7]:


step = 1

context = pipeline.fit(**context, output_=step, start_=step)
context.keys()


# ### StandardScaler
# this primitive transforms features by scaling each feature to a given range.
# * **input**: `X` the data used to compute the per-feature mean and standard deviation used for later scaling along the features axis.
# * **output**: `X` which is a transformed version of X.

# In[8]:


step = 2

context = pipeline.fit(**context, output_=step, start_=step)
context.keys()


# In[9]:


# after scaling the data between [-1, 1]

for i, x in list(zip(context['index'], context['X']))[:5]:
    print("entry at {} has value {}".format(i, x))


# ### rolling window sequence
# this primitive generates many sub-sequences of the original sequence. it uses a rolling window approach to create the sub-sequences out of time series data.
# 
# * **input**: 
#     - `X` n-dimensional sequence to iterate over.
#     - `index` array containing the index values of X.
# * **output**:
#     - `X` input sequences.
#     - `y` target sequences.
#     - `index` first index value of each input sequence.
#     - `target_index` first index value of each target sequence.

# In[10]:


step = 3

context = pipeline.fit(**context, output_=step, start_=step)
context.keys()


# In[11]:


# after slicing X into multiple sub-sequences
# we obtain a 3 dimensional matrix X where
# the shape indicates (# slices, window size, 1)
# and similarly y is (# slices, target size)

print("X shape = {}\ny shape = {}\nindex shape = {}\ntarget index shape = {}".format(
    context['X'].shape, context['y'].shape, context['index'].shape, context['target_index'].shape))


# ### TimesFM
# this is a forecasting using TimesFM. you can read more about it in the [related paper](https://arxiv.org/abs/2310.10688).
# 
# * **input**: 
#     - `X` n-dimensional array containing the input sequences for the model.
# * **output**: 
#     - `y_hat` predicted values

# In[12]:


step = 4

context = pipeline.fit(**context, output_=step, start_=step)
context.keys()


# In[13]:


context['y_hat'].shape


# ### regression errors
# 
# this primitive computes an array of errors comparing predictions and expected output.
# 
# * **input**: 
#     - `y` ground truth.
#     - `y_hat` forecasted values.
# * **output**: `errors` array of errors.

# In[14]:


step = 5

context = pipeline.fit(**context, output_=step, start_=step)
context.keys()


# ### find anomalies
# 
# this primitive finds anomalies from sequence of errors
# 
# * **input**: 
#     - `errors` array of errors
#     - `target_index` indices
# * **output**: `anomalies`.

# In[15]:


step = 6

context = pipeline.fit(**context, output_=step, start_=step)
context.keys()


# In[16]:


context['anomalies']


# ## 3. Evaluate performance
# 
# In this next step we will load some already known anomalous intervals and evaluate how
# good our anomaly detection was by comparing those with our detected intervals.
# 
# For this, we will first load the known anomalies for the signal that we are using:

# In[19]:


from orion.data import load_anomalies

ground_truth = load_anomalies('S-1')
ground_truth


# In[20]:


anomalies = []
for ano in context['anomalies']:
    anomalies.append((ano[0], ano[1]))
anomalies


# In[21]:


from orion.evaluation import contextual_confusion_matrix, contextual_f1_score

start, end = context['index'][0], context['index'][-1]

contextual_confusion_matrix(ground_truth, anomalies, start = start, end = end, weighted=False)


# In[22]:


contextual_f1_score(ground_truth, anomalies, start = start, end = end, weighted=False)


# In[ ]: