In [1]:

# Some basic data science imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
import datetime

import time
import os

# Some gudhi imports for TDA
from tda_pipeline import DataSelector, RipsPersistence, LPNorm
from gudhi.representations import Landscape

# Some graphical imports for the web app
import ipywidgets as widgets
import plotly.graph_objects as go

# Some imports for ipywidgets
from ipywidgets import DatePicker, BoundedIntText, ToggleButton, Text, HBox, VBox
from IPython.display import display

# cf. data_generation.py
df = pd.read_csv('latest.csv', index_col=0)

# min, max and default start values taken from the dataset - Python 3.7+
min_value = datetime.date.fromisoformat(df.index[0])
max_value = datetime.date.fromisoformat(df.index[-1])

last_year = max_value - datetime.timedelta(days=250)

# Widgets
start = DatePicker(description='start', value = last_year)
end   = DatePicker(description='end', value = max_value)
window = BoundedIntText(value=50, min=10, max=100, step=1, description='Windows size')

button = ToggleButton(description='Compute', icon='cogs')

def nearest_str_date(items, pivot):
    return min(items, key=lambda str_date: abs(datetime.date.fromisoformat(str_date) - pivot))

def compute_Lp_norms():
    w=int(window.value)
    start_idx = df.index.get_loc(str(nearest_str_date(df.index, start.value - datetime.timedelta(days=w))))
    end_idx = df.index.get_loc(str(nearest_str_date(df.index, end.value)))
    # Some error management
    if start_idx == end_idx:
        raise IndexError('Start date must be different from end date.')
    if end_idx < w:
        end_idx = w
    if end_idx < start_idx:
        start_idx, end_idx = end_idx, start_idx

    pipe = Pipeline(
        [
            ("data_sel", DataSelector(start=start_idx, end=end_idx, w=w)),
            ("rips_pers", RipsPersistence(max_rips_dimension=2, max_persistence_dimension=2, only_this_dim=1, n_jobs=-1)),
            ("landscape", Landscape(resolution=1000)),
            ("lpnorm", LPNorm(n_jobs=-1)),
            ("mms", MinMaxScaler()),
        ]
    )

    start_chrono = float(time.time())
    #os.write(1, bytes(str(start_chrono),'UTF-8') + b'\r\n')
    L1L2mms = pipe.fit_transform(df)
    stop_chrono = float(time.time())
    #os.write(1, bytes(str(stop_chrono),'UTF-8') + b'\r\n')
    os.write(1, bytes(str(stop_chrono - start_chrono),'UTF-8') + b' sec. \r\n')
    
    l1l2df = pd.DataFrame({'date': df[start_idx+w:end_idx].index, 'L1': L1L2mms.transpose()[0], 'L2': L1L2mms.transpose()[1]})
    # Calculate the variance for the L norms
    j = 0

    for i in l1l2df.index:
        if j < w:
            k = 0
        else:
            k = j - w
        l1l2df.loc[i, 'L1_variance'] = np.var(l1l2df.iloc[k:j].L1)
        l1l2df.loc[i, 'L2_variance'] = np.var(l1l2df.iloc[k:j].L2)
        j += 1
        
    return l1l2df

def compute(args):
    l1l2df = compute_Lp_norms()
    fig.data[0]['x'] = l1l2df['date']
    fig.data[0]['y'] = l1l2df['L1']
    fig.data[1]['x'] = l1l2df['date']
    fig.data[1]['y'] = l1l2df['L2']
    fig.data[2]['x'] = l1l2df['date']
    fig.data[2]['y'] = l1l2df['L1_variance']
    fig.data[3]['x'] = l1l2df['date']
    fig.data[3]['y'] = l1l2df['L2_variance']
    
button.observe(compute, 'value')

# Layout
left_box = HBox([start, end, window, button])

fig = go.FigureWidget()

l1l2df = compute_Lp_norms()
fig.add_scatter(x=l1l2df['date'], y=l1l2df['L1'], mode='lines', name='L1')
fig.add_scatter(x=l1l2df['date'], y=l1l2df['L2'], mode='lines', name='L2')
fig.add_scatter(x=l1l2df['date'], y=l1l2df['L1_variance'], mode='lines', name='L1_variance')
fig.add_scatter(x=l1l2df['date'], y=l1l2df['L2_variance'], mode='lines', name='L2_variance')
fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
fig.layout.title.text = 'Lp norms'

display(left_box)
fig

0.05286908149719238 sec.

HBox(children=(DatePicker(value=datetime.date(2021, 9, 18), description='start'), DatePicker(value=datetime.da…

FigureWidget({
    'data': [{'mode': 'lines',
              'name': 'L1',
              'type': 'scatter',
   …

TDA of financial time series¶