#!/usr/bin/env python
# coding: utf-8

# In[52]:


import multiprocessing as mp
# https://github.com/matplotlib/matplotlib/issues/15410#issuecomment-625027757
mp.set_start_method('forkserver')


# In[81]:


get_ipython().run_line_magic('matplotlib', 'inline')
import warnings
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from functools import reduce
from vortexasdk import CargoTimeSeries, Products, Geographies
from vortexasdk.utils import convert_to_list

warnings.filterwarnings("ignore")
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams.update({'font.size': 14})


# In[82]:


START_DATE = datetime(2019, 6, 10)
END_DATE = datetime(2020, 6, 10)
UNIT = 'b'


# # Define helper functions

# In[83]:


def get_product_id_exact(product_name):
    if product_name is None:
        return None
    
    products = [p.id for p in Products().search(product_name).to_list() if p.name==product_name]    
    assert len(products) == 1
    return products[0]

def get_geography_id_exact(geog_name):
    if geog_name is None:
        return None
    
    geogs = [g.id for g in Geographies().search(geog_name).to_list() if g.name==geog_name]    
    assert len(geogs) == 1
    return geogs[0]


# In[84]:


def merge(data_frames):
    return reduce(
        lambda left, right: pd.merge(
            left, right, left_index=True, right_index=True, how="outer"
        ),
        data_frames,
    )


# In[85]:


def plot_df(df, title=None, unit=UNIT):
    df.plot(title=title, grid=True)
    plt.xlabel('date')
    plt.ylabel('k' + unit);


# In[86]:


def prepare_dataset(df_fs, product_names, destination_names, storage_names, filter_activity):

    # just keep key and value
    df_fs = df_fs[['key', 'value']]

    # use kilo unit not unit
    df_fs['value'] = df_fs['value'] / 1000


    # rename columns
    col_name = str((destination_names or " ")) + \
        " " + str((storage_names) or " ") + \
        " " + str((product_names) or " ") + \
        ": " + filter_activity

    df_fs = df_fs.rename(columns={'key': 'date', 'value': col_name})

    # remove time zone from timestamp
    df_fs['date'] = pd.to_datetime(df_fs['date']).dt.tz_localize(None)

    return df_fs.set_index('date')


# In[87]:


def fetch_timeseries(filter_activity, product_names=None, destination_names=None, storage_names=None,
                    unit=UNIT, frequency='day', start_date=START_DATE, end_date=END_DATE):
    
    # Generate IDs
    product_ids = [get_product_id_exact(name) for name in convert_to_list(product_names)]
    destination_ids = [get_geography_id_exact(name) for name in convert_to_list(destination_names)]
    storage_ids = [get_geography_id_exact(name) for name in convert_to_list(storage_names)]
    
    
    # Load Data
    df = CargoTimeSeries().search(timeseries_frequency=frequency,
                                     timeseries_unit=unit,
                                     disable_geographic_exclusion_rules=True,
                                     filter_products=product_ids,
                                     filter_destinations=destination_ids,
                                     filter_storage_locations=storage_ids,
                                     filter_activity=filter_activity,
                                     filter_time_min=start_date,
                                     filter_time_max=end_date).to_df()

    # Rename columns, set index etc
    return prepare_dataset(df, product_names, destination_names, storage_names, filter_activity)
    

# # Define our commonly used constants

# In[88]:


clean = "Clean Petroleum Products"
naphtha = "Naphtha"
diesel_gasoil = "Diesel/Gasoil"
gasoline_blending_components = "Gasoline/Blending Components"
jet_kero = "Jet/Kero"


# # Analysis Start

# ### Load all global clean floating storage cargos

# In[89]:


clean_fs = fetch_timeseries("storing_state", clean)
clean_fs.head()


# In[90]:


plot_df(clean_fs, "Global Clean Floating Storage")


# ### Let's look at the product split of these global floating storage cargos

# In[91]:


data_frames = [
    fetch_timeseries("storing_state", gasoline_blending_components),
    fetch_timeseries("storing_state", diesel_gasoil),
    fetch_timeseries("storing_state", naphtha),
    fetch_timeseries("storing_state", jet_kero)
]

df_merged = merge(data_frames)
df_merged.head()


# In[92]:


plot_df(df_merged)


# ### Asia-only floating storage

# In[93]:


dfs_asia = [
    fetch_timeseries("storing_state", storage_names="Asia", product_names=gasoline_blending_components),
    fetch_timeseries("storing_state", storage_names="Asia", product_names=diesel_gasoil),
    fetch_timeseries("storing_state", storage_names="Asia", product_names=naphtha),
    fetch_timeseries("storing_state", storage_names="Asia", product_names=jet_kero)
]

df_asia = merge(dfs_asia)
df_asia.head()


# In[94]:


plot_df(df_asia)


# ###  See how Diesel/Gasoil storage levels are split across Asian geographies

# In[95]:


dfs_diesel_gasoil_countries = [
    fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names="South Korea"),
    fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names="India"),
    fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names="China"),
    fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names=["Singapore", "Malaysia", "Indonesia"])
]

df_diesel_gasoil_countries = merge(dfs_diesel_gasoil_countries)
df_diesel_gasoil_countries.head()


# In[96]:


plot_df(df_diesel_gasoil_countries)


# ### Diesel/Gasoil Asian imports

# In[97]:


end_date = datetime(2020, 5, 31)


# In[98]:


dfs_imports = [
    fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Australia"),
    fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Indonesia"),
    fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Philippines"),
    fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Vietnam")
]

df_imports = merge(dfs_imports)


# In[99]:


plot_df(df_imports)