#!/usr/bin/env python
# coding: utf-8

# # Vortexa voyages SDK use case
# This notebook contains code and instructions for generating use cases of Vortexa's Voyages dataset.

# ## Import required libraries
# 

# In[1]:


import vortexasdk as v
from datetime import datetime
import pandas as pd
import numpy as np
import time
import plotly.express as px
import dateutil.relativedelta


# ## Store Vortexa IDs
# Vortexa's SDK interprets vessels, products and geographies using unique IDs rather than names. The code below demonstrates how to search and save various Vortexa reference IDs.

# ### Geographies
# Search for geography ids (remove hashtags to search).

# In[2]:


# full_length_df = v.Geographies().search(term=["Mexico East"]).to_df()
# print(full_length_df.to_string(index=False))

# Store geography ids
gom='37c8c4eeb730d1cd41f90ca6bf95c923222b0734b1b0336a475acce821f87ebd'
nwe='c5460c5a4ece7b64ffc0cc280aeade60d364423e8e062ef4a11494352fe6fdbb'
usac='2d8f42426b74af03caa9055df1952d22a011f2a210b53b9132955a89fc552433'


# ### Products
# Search for product ids (remove hashtags to search).

# In[3]:


# product_search = v.Products().search(term=['diesel']).to_df()
# print (product_search.to_string(index=False))

cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'
lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'


# ## Define main functions
# The below code defines the functions which process Vortexa data into a format which can be visualised.

# In[4]:


# Function for post ballast distirbution
def post_ballast_distribution(origin, origin_excl, destination, destination_excl, vessel_class, product, product_excl, start_y, start_m, start_d, end_y, end_m, end_d, show_top_x, plot, option):

    # set date objects
    start=datetime(start_y, start_m, start_d)
    end=datetime(end_y, end_m, end_d, 23, 59, 59)
    
    # Pull the laden voyages which occurred in the required timeframe
    route = v.VoyagesSearchEnriched().search(
        origins = origin,
        origins_excluded = origin_excl,
        destinations = destination,
        destinations_excluded = destination_excl,
        time_min = start,
        time_max = end,
        vessels = vessel_class,
        products = product,
        products_excluded = product_excl
    )
    
    # Convert to dataframe
    route = pd.DataFrame(route)
    
    # Sort by end_timestamp
    route["end_timestamp"] = pd.to_datetime(route["end_timestamp"])
    route.sort_values(by='end_timestamp', ascending = True, inplace=True)
    
    # Remove null end_timestamps
    route.drop(route[pd.isnull(route['end_timestamp']) == True].index, inplace = True)
    
    # Remove voyages that end past the specified end date
    route = route[(pd.to_datetime(route['end_timestamp']).dt.tz_convert(None) <= pd.to_datetime(end))]
    
    # Remove voyages still in progress (i.e. voyages with no next voyage ID)
    route = route.dropna(subset=['next_voyage_id'])

    # Get the next voyage IDs
    next_voyage_id_list = list(route["next_voyage_id"].unique())
    next_voyage_id_list=[x for x in next_voyage_id_list if x != '']
    
    # Get voyages corresponding to the next voyage IDs
    df = v.VoyagesSearchEnriched().search(
        voyage_id = next_voyage_id_list,
        columns = "all").to_df()

    # Sort them by their start dates (end date of laden voyage/discharge date)
    df["START DATE"] = pd.to_datetime(df["START DATE"])
    df.sort_values(by='START DATE', ascending = True, inplace=True)
    
    # Relabel blank destinations as Undetermined
    df['FINAL DESTINATION SHIPPING REGION']=df['FINAL DESTINATION SHIPPING REGION'].replace([''],'Undetermined')

    # Remove laden results
    df=df.loc[df["VOYAGE STATUS"] == 'Ballast']
    
    df.reset_index(drop=True, inplace=True)
    
    # Store the unique destinations
    dests = list(df["FINAL DESTINATION SHIPPING REGION"].unique())
        
    dest_counts = []
    # Count the number of times each ballast destination is declared
    for i in range(len(dests)):
        g = len(df.loc[df['FINAL DESTINATION SHIPPING REGION'] == dests[i]])
        dest_counts.append(g)

    # convert counts and destinations list to data frames
    dests = pd.DataFrame(dests)
    dest_counts = pd.DataFrame(dest_counts)
    
    # compile unique destinations and their counts
    ranked = pd.concat([dests, dest_counts], axis = 1)
    ranked.columns = ['Destination', 'Count']
    
    # Sort destinations by highest count
    ranked.sort_values(by='Count', ascending = False, inplace=True)
    
    # Get a list of ranked destinations
    dests = list(ranked["Destination"])
    
    # Convert dates of ballast voyages to months and years for counting purposes
    df["months"] = df['START DATE'].dt.strftime('%m-%Y')
    
    # Get a complete list of dates in month/year format
    dates = list(pd.date_range(start=start, end=end, freq='MS').strftime('%m-%Y'))
    dates_df=pd.DataFrame(dates, columns=['Date'])
    
    # Initialise a data frame for dates
    raw_counts_df=dates_df
    
    # Loop through all destinations
    for j in range(len(dests)):
        
        # initialise a list to store counts
        counts2=[]
        
        # loop through dates
        for i in range(len(dates)):
            
            # count destination occurrences for this date
            g = len(df[(df['FINAL DESTINATION SHIPPING REGION'] == dests[j]) & (df['months'] == dates[i])])
            
            # add to list
            counts2.append(g)
        
        # convert counts to data frame and label it with corresponding destination
        counts2_df=pd.DataFrame(counts2, columns=[dests[j]])
        
        # add counts for this destination to data frame
        raw_counts_df=pd.concat([raw_counts_df, counts2_df], axis=1)
    
    # select count values
    raw_count_vals=raw_counts_df[list(raw_counts_df.columns)[1:]]
    
    # convert counts to percentages
    df_props = raw_count_vals.div(raw_count_vals.sum(axis=1), axis=0)
    
    # add dates to proportions
    df_props=pd.concat([dates_df, df_props], axis=1)
    
    # If you wish to only see the top x destinations, put the rest into 'other'
    if (len(list(raw_counts_df.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(raw_counts_df.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(raw_counts_df.columns)[(show_top_x + 1):]

        # Sum the others
        raw_counts_df['other']=raw_counts_df[rest].sum(axis=1) # other column is sum of everything not in top x

        raw_counts_df2=raw_counts_df[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        raw_counts_df2=raw_counts_df
        
        # If you wish to only see the top x destinations, put the rest into 'other'
    if (len(list(df_props.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(df_props.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(df_props.columns)[(show_top_x + 1):]

        # Sum the others
        df_props['other']=df_props[rest].sum(axis=1) # other column is sum of everything not in top x

        df_props2=df_props[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        df_props2=df_props
        
    df_props2=df_props2.copy()
    raw_counts_df2=raw_counts_df2.copy()
    
    df_props2['Date']=pd.to_datetime(df_props2['Date'], format='%m-%Y')
    raw_counts_df2['Date']=pd.to_datetime(raw_counts_df2['Date'], format='%m-%Y')

    if plot:
        
        if option=='counts':
            
            # Plot ballast distribution data (counts)
            fig = px.bar(
                raw_counts_df2, 
                x="Date", 
                y=list(raw_counts_df2.columns)[1:],
                labels={
                    "Date":"Date",
                    "value":"Number of voyages"
                }
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
        if option=='proportions':
            
            # Plot ballast distribution data (proportions)
            fig = px.area(
                df_props2, 
                x="Date", 
                y=list(df_props2.columns)[1:],
                labels={
                    "Date":"Date",
                    "value":"Proportion of voyages"
                }
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
            
    raw_counts_df2['Date']=raw_counts_df2['Date'].dt.strftime('%b-%Y')
    df_props2['Date']=df_props2['Date'].dt.strftime('%b-%Y')

    
    return raw_counts_df2, df_props2, df


# Helper function to make time blocks of 4 years from a specified start date
def get_search_blocks(start_y, start_m, start_d, today):
    
    """
    Vortexa's API maximum search is 4 years and starts in 2016. 
    This function creates a list of tuples splitting up start_date - present into 4-year blocks.
    """
    
    blocks=[]
    
    start=datetime(start_y, start_m, start_d)
    end=start + dateutil.relativedelta.relativedelta(years=4) - dateutil.relativedelta.relativedelta(seconds=1)
    
    if end > today:
        blocks.append((start, today))
        
    else:
        blocks.append((start, end))
    
    while end < today:
        start+=dateutil.relativedelta.relativedelta(years=4) 
        end+=dateutil.relativedelta.relativedelta(years=4)
        
        if end > today:
            blocks.append((start, today))
            
        else:  
            blocks.append((start, end))
    
        
    return blocks


# Function for aggregating voyages data and splitting
def voyages_time_series_with_split(start_y, start_m, start_d, end_y, end_m, end_d, origin, destination, locs, prod, prod_excl, vessel_class, vessel_class_excl, status, freq, option, operator, title, split, plot, plot_type, show_top_x):
   
    today=datetime(end_y, end_m, end_d)
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading {option} for period: {time_min} to {time_max}")

    
        # Original query
        result = v.VoyagesTimeseries().search(
            time_min=time_min,
            time_max=time_max,
            origins=origin,
            destinations=destination,
            locations=locs,
            latest_products=prod,
            latest_products_excluded=prod_excl,
            vessels=vessel_class,
            vessels_excluded=vessel_class_excl,
            voyage_status=status,
            breakdown_property=option,
            breakdown_frequency=freq,
            breakdown_split_property=split,
            breakdown_unit_operator=operator,
            ).to_df(columns='all')

        # If you wish to split, process the data as follows
        if split != None:

            # Break the output down into k data frames, all with date, id, label, value and count columns
            # Stack these on top of each other

            breakdown_cols=list(result.columns)[3:]
            cols=['key']+breakdown_cols
            k=int(len(breakdown_cols) / 4)
            result2=result[cols]

            # Empty data frame for stacking
            stack=pd.DataFrame()

            # Loop through each split property
            for i in range(k):

                cols=['key', f'breakdown.{i}.id', f'breakdown.{i}.label', f'breakdown.{i}.value', f'breakdown.{i}.count']

                temp=result2[cols]

                new_cols=['date', 'id', 'label', 'value', 'count']

                temp.columns=new_cols

                stack=pd.concat([stack, temp])

            # Choose relevant columns from the stacked data frame    
            stack2=stack[['date', 'label', 'value']]

            # Remove rows with blank labels
            # These are for regions where a 0 value will show, we deal with this later
            result3=stack2[stack2['label']!='']

            # Sum each split property and rank them to obtain an order for the data to appear in
            result3=result3.copy()
            result3['value'] = pd.to_numeric(result3['value'])
            sum_per_label=result3.groupby('label')['value'].sum().reset_index()
            sum_per_label.sort_values(by='value', ascending=False, inplace=True)
            labels=list(sum_per_label['label'].unique()) # we use this order

            # Sort the result first by split property and then by date
            # This helps us to re-transpose the data later
            result3=result3.sort_values(by=['label', 'date']).copy()

            # Create and sort a dates data frame
            dates_df=pd.DataFrame(result3['date'].unique(), columns=['date'])
            dates_df['date']=pd.to_datetime(dates_df['date'])
            dates_df.sort_values(by='date', ascending=True, inplace=True)

            # Empty data frame to store split properties' corresponding columns
            store_df=pd.DataFrame()

            # First loop through each split property
            for i in range(len(labels)):

                # Empty list to store values
                values=[]

                # Temporary data frame to work with (only for current split property)
                temp_df=result3[result3['label']==labels[i]]

                # Now loop through each date in the temporary data
                for j in range(len(dates_df['date'])):

                    # Obtain record for date in question
                    check=temp_df[temp_df['date']==dates_df['date'][j]]
                    
                    # If no record, add 0.0 as the value for that split property on that date
                    if len(check)==0:
                        values.append(0.0)

                    # If record exists, add its value
                    else:
                        values.append(check['value'].iloc[0])

                # Compile
                values_df=pd.DataFrame(values, columns=[labels[i]])
                store_df=pd.concat([store_df, values_df], axis=1)

            # After looping, add date column
            result5=pd.concat([dates_df, store_df], axis=1)


        # If no split, just select and rename relevant columns
        else:
            result5=result[['key', 'value']]
            result5.columns=['date', 'value']
            
        result_dfs=pd.concat([result_dfs, result5])
        
    # If you wish to only show the top x split properties in the plot, put the rest into 'other'
    if (len(list(result_dfs.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(result_dfs.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(result_dfs.columns)[(show_top_x + 1):]

        # Sum the others
        result_dfs['other']=result_dfs[rest].sum(axis=1) # other column is sum of everything not in top x

        result_dfs2=result_dfs[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        result_dfs2=result_dfs
        
    # Set units for y axis label if you wish to plot
    if option=='vessel_count':
        y_axis_label='No. of vessels'
        
    elif option=='utilisation':
        y_axis_label="No. of vessels"

    elif option=='cargo_quantity':
        y_axis_label="tonne-days"
        
    elif option=='dwt':
        y_axis_label="dwt"

    elif option=='cubic_capacity':
        y_axis_label="cubic meters"

    elif option=='tonne_miles':
        y_axis_label="tonne-miles"
        
    elif option=='avg_speed':
        y_axis_label="knots"
      
    if plot_type=='area':
    
        if plot: # plot data if desired

            fig = px.area(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    if plot_type=='line':
    
        if plot: # plot data if desired

            fig = px.line(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    if plot_type=='bar':
    
        if plot: # plot data if desired

            fig = px.bar(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
        
    # Reformat dates and rename date column
    result_dfs2=result_dfs2.copy()
    result_dfs2['date']=result_dfs2['date'].dt.strftime('%d-%m-%Y')
    result_dfs2.rename(columns={'date': 'Date'}, inplace=True)
    
    if split==None:
        result_dfs2.rename(columns={'value': title}, inplace=True)
        
    result_dfs2 = result_dfs2.fillna(0)
        

    return result_dfs2


# function to create a moving average
def moving_average(data, period, option):
    
    if option=='multiple':

        # calculate moving avg
        moving_avg = pd.DataFrame(data.iloc[:, 1:].rolling(window=period, min_periods=1).mean())

        # add moving average
        moving_avg_df=pd.concat([data.iloc[0:, 0:1], moving_avg], axis=1)

        moving_avg_df.columns=list(data.columns)
        
    elif option=='single':
        
        # calculate moving avg
        moving_avg = pd.DataFrame(data['value'].rolling(window=period, min_periods=1).mean())
        moving_avg.columns=[f'{period}-day moving_avg']

        # get all columns
        data_cols=list(data.columns)

        # get all columns except vlaue
        date_cols=[x for x in data_cols if x !='value']

        # add moving average
        moving_avg_df=pd.concat([data[date_cols], moving_avg], axis=1)

        moving_avg_df.rename(columns={f'{period}-day moving_avg':'value'}, inplace=True)
        

    return moving_avg_df

# Function for getting freight data
def voyages_time_series(start_y, start_m, start_d, origin, origin_excl, destination, destination_excl, prod, prod_excl, vessel_class, vessel_class_excl, status, freq, unit, operator):
   
    today=datetime.today()
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading freight data for period: {time_min} to {time_max}")

    
        # Original query
        result = v.VoyagesTimeseries().search(
            time_min=time_min,
            time_max=time_max,
            origins=origin,
            origins_excluded=origin_excl,
            destinations=destination,
            destinations_excluded=destination_excl,
            latest_products=prod,
            latest_products_excluded=prod_excl,
            vessels=vessel_class,
            vessels_excluded=vessel_class_excl,
            voyage_status=status,
            breakdown_frequency=freq,
            breakdown_property=unit,
            breakdown_unit_operator=operator
            ).to_df(columns='all')

        result2=result[['key', 'value']]
        result2.columns=['date', 'value']
            
        result_dfs=pd.concat([result_dfs, result2])
        
    # Reformat dates and rename date column
    result_dfs=result_dfs.copy()
    result_dfs['date'] = pd.to_datetime(result_dfs['date'])
    result_dfs['string_date']=result_dfs['date'].dt.strftime('%d-%m-%Y')
    result_dfs['dd_mmm']=result_dfs['date'].dt.strftime('%d-%b')
    result_dfs['month']=result_dfs['date'].dt.strftime('%b')
    result_dfs['week_end_timestamp'] = result_dfs['date'] + pd.offsets.Week(weekday=6)  
    result_dfs['week_number'] = result_dfs['date'].dt.isocalendar().week
    result_dfs['year']=round(pd.to_numeric(result_dfs['date'].dt.strftime('%Y')), 0)
    result_dfs = result_dfs.fillna(0)
    
    result_dfs=result_dfs[['date', 'week_end_timestamp', 'string_date', 'dd_mmm', 'week_number', 'month', 'year', 'value']]
        
    result_dfs.reset_index(drop=True, inplace=True)

    return result_dfs


# function for obtaining seasonal chart data
def seasonal_charts(data, freq, start_y):
    
    # Remove leap days for daily time series
    df=data[data['dd_mmm']!='29-Feb']
    df.reset_index(drop=True, inplace=True)
    
    # Set constants
    current_date=datetime.today()
    this_year=current_date.year
    last_year=this_year-1
    stats_end_y=last_year
    stats_start_y=start_y
    
    # Define stats calculating data set and current year dataset
    stats_df=df[(df['year'] >= stats_start_y) & (df['year'] <= stats_end_y)]
    this_year_df=df[df['year']==this_year]
    
    # if frequency is daily, calculate stats on a daily basis
    if freq=='day':

        # date range creation - use a non-leap year
        start_date = datetime(2023, 1, 1)
        end_date = datetime(2023, 12, 31)

        date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='1D'), columns=['Date'])
        date_range['Date']=date_range['Date'].dt.strftime('%d-%b')
        
        # empty lists to store stats
        mins=[]
        maxs=[]
        avgs=[]
        this_year_vals=[]
        
        # loop through dates and calculate stats
        for i in range(len(date_range)):

            temp=stats_df[stats_df['dd_mmm']==date_range['Date'][i]]
            
            mn=min(temp['value'])
            mx=max(temp['value'])
            av=temp['value'].mean()

            mins.append(mn)
            maxs.append(mx)
            avgs.append(av)

        # obtain last year's values
        last_year_df=pd.DataFrame(stats_df[stats_df['year']==last_year]['value'])
        last_year_df.columns=['Last year']
        last_year_df.reset_index(drop=True, inplace=True)
        
        # loop through dates and obtain current year values, if no data yet, add a blank
        for i in range(len(date_range)):

            temp=this_year_df[this_year_df['dd_mmm']==date_range['Date'][i]]

            if (len(temp)!=0):

                add=temp['value'].iloc[0]

                this_year_vals.append(add)

            elif (len(temp)==0):

                this_year_vals.append('')

        
        # convert stats to data frames
        mins_df=pd.DataFrame(mins, columns=['Min.'])
        maxs_df=pd.DataFrame(maxs, columns=['Max.'])
        avgs_df=pd.DataFrame(avgs, columns=[f'Average {stats_start_y}-{stats_end_y}'])
        this_year_vals_df=pd.DataFrame(this_year_vals, columns=['Current year'])
        
        # compile data
        seasonal_df=pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
        
        # calculate range
        seasonal_df[f'Range {stats_start_y}-{stats_end_y}']=seasonal_df['Max.']-seasonal_df['Min.']
        
        # compile in desired order
        seasonal_df=seasonal_df[['Date', 'Min.', f'Range {stats_start_y}-{stats_end_y}', f'Average {stats_start_y}-{stats_end_y}', 'Last year', 'Current year']]

    
    # if frequency is monthly, calculate stas on a monthly basis
    elif freq=='month':

        # date range creation
        start_date = datetime(2023, 1, 1)
        end_date = datetime(2023, 12, 31)

        date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='1M'), columns=['Date'])
        date_range['Date']=date_range['Date'].dt.strftime('%b')
        
        # empty lists to store various stats
        mins=[]
        maxs=[]
        avgs=[]
        this_year_vals=[]
        
        # loop through dates and calculate stats
        for i in range(len(date_range)):

            temp=stats_df[stats_df['month']==date_range['Date'][i]]

            mn=min(temp['value'])
            mx=max(temp['value'])
            av=temp['value'].mean()

            mins.append(mn)
            maxs.append(mx)
            avgs.append(av)

        # obtain previous year's values
        last_year_df=pd.DataFrame(stats_df[stats_df['year']==last_year]['value'])
        last_year_df.columns=['Last year']
        last_year_df.reset_index(drop=True, inplace=True)
        
        # loop through dates and obtain current year values, if not data yet, add a blank
        for i in range(len(date_range)):

            temp=this_year_df[this_year_df['month']==date_range['Date'][i]]

            if (len(temp)!=0):

                add=temp['value'].iloc[0]

                this_year_vals.append(add)

            elif (len(temp)==0):

                this_year_vals.append('')

        # convert stats lists to data frames
        mins_df=pd.DataFrame(mins, columns=['Min.'])
        maxs_df=pd.DataFrame(maxs, columns=['Max.'])
        avgs_df=pd.DataFrame(avgs, columns=[f'Average {stats_start_y}-{stats_end_y}'])
        this_year_vals_df=pd.DataFrame(this_year_vals, columns=['Current year'])
        
        # compile data
        seasonal_df=pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
        
        # calculate the range 
        seasonal_df[f'Range {stats_start_y}-{stats_end_y}']=seasonal_df['Max.']-seasonal_df['Min.']
        
        # compile in desired order
        seasonal_df=seasonal_df[['Date', 'Min.', f'Range {stats_start_y}-{stats_end_y}', f'Average {stats_start_y}-{stats_end_y}', 'Last year', 'Current year']]
        
        
    return seasonal_df

# Function to plot seasonal chart
def plot_seasonal(y_min, y_max, data, title):
    df=data

    colors = {
        'Min.': 'white',              
        list(df.columns)[2]: 'lightblue', 
        list(df.columns)[3]: 'blue', 
        'Last year': 'yellow',       
        'Current year': 'red'     
    }

    fig = px.area(df, x='Date', y=list(df.columns)[1:3], title=title, color_discrete_map=colors)

    # Add line charts for Average, Last year, and Current year
    for column in list(df.columns)[3:6]:
        fig.add_scatter(x=df['Date'], y=df[column], mode='lines', name=column, line=dict(color=colors[column]))

    # Set the y-axis range
    fig.update_yaxes(range=[y_min, y_max])

    # Show the plot
    fig.show()
    
# Function to plot and extract seasonal data
def complete_seasonal_voyages(start_y, start_m, start_d, origin, origin_excl, destination, destination_excl, prod, prod_excl, vessel_class, vessel_class_excl, status, freq, unit, operator, ma_period, plot, title, y_min, y_max):

    # Query voyages data
    daily_voyages_ts=voyages_time_series(start_y=start_y, start_m=start_m, start_d=start_d, 
                                         prod=prod, prod_excl=prod_excl, 
                                         vessel_class=vessel_class, vessel_class_excl=vessel_class_excl, 
                                         status=status,
                                         freq=freq, unit=unit, operator=operator, 
                                         origin=origin, origin_excl=origin_excl,
                                         destination=destination, destination_excl=destination_excl)


    if ma_period==None:
        data=seasonal_charts(data=daily_voyages_ts, freq=freq, start_y=start_y)

    else:
        # Calculate moving averages
        voyages_ts_x_day_ma=moving_average(data=daily_voyages_ts, period=ma_period, option='single')
        data=seasonal_charts(data=voyages_ts_x_day_ma, freq=freq, start_y=start_y)
        title=title+f' ({ma_period}-{freq} MA)'

    if plot:
        plot_seasonal(y_min=y_min, y_max=y_max, 
                      data=data, 
                      title=title)
        
    return data


# ## Post laden route ballast distribution
# Due to Vortexa's unique Voyages dataset, which has unique identifiers which are all linked to the next voyage as well as the previous voyage identifiers, we can gain insight into what tankers are doing after they discharge on a specified route. This is valuable for analysts who support freight traiders and who want to plan the best regional positioning of their fleets based on the latest changes in tanker behaviour. Additionally, commodity traders can use this to anticipate increased demand in a region.
# 
# 

# ### Worked example - TC2
# In this example, we visualise the behaviour of MR2 tankers after discharging CPP on TC2 (Europe-to-USAC).

# In[6]:


# Query and plot post ballast counts for MR2s trading TC2
tc2_post_ballast=post_ballast_distribution(origin=nwe, origin_excl=None, 
                                           destination=usac, destination_excl=None, 
                                           vessel_class='oil_mr2', 
                                           product=cpp, product_excl=lpg, 
                                           start_y=2021, start_m=1, start_d=1, 
                                           end_y=2024, end_m=5, end_d=31, 
                                           show_top_x=3, plot=True, option='proportions')


# ### Overlay with speed data
# Use voyages time series to aggregate ballast speeds towards region of interest.

# In[7]:


voyages_ts=voyages_time_series_with_split(start_y=2018, start_m=1, start_d=1, 
                                          end_y=2024, end_m=5, end_d=30, 
                                          origin=None, destination=gom, locs=None, 
                                          prod=cpp, prod_excl=lpg, 
                                          vessel_class='oil_mr2', vessel_class_excl=None, 
                                          status='ballast', freq='day', option='avg_speed', operator='avg', 
                                          title='MR2 ballast speeds towards Gulf of Mexico', 
                                          split=None, plot=True, plot_type='line', show_top_x=1000)


# ### Contextualise speed data
# Use seasonality to put speed data into context.

# In[8]:


seasonal_speed=complete_seasonal_voyages(start_y=2017, start_m=1, start_d=1, 
                                         origin=None, origin_excl=None, 
                                         destination=gom, destination_excl=None, 
                                         prod=cpp, prod_excl=lpg, 
                                         vessel_class='oil_mr2', vessel_class_excl=None, 
                                         status='ballast', freq='day', unit='avg_speed', operator='avg', 
                                         ma_period=5, plot=True, 
                                         title='Seasonal MR2 speed towards Gulf of Mexico', 
                                         y_min=10.3, y_max=13.5)


# In[ ]:


# In[ ]: