#!/usr/bin/env python # coding: utf-8 # In[52]: import multiprocessing as mp # https://github.com/matplotlib/matplotlib/issues/15410#issuecomment-625027757 mp.set_start_method('forkserver') # In[81]: get_ipython().run_line_magic('matplotlib', 'inline') import warnings import pandas as pd import matplotlib.pyplot as plt from datetime import datetime from functools import reduce from vortexasdk import CargoTimeSeries, Products, Geographies from vortexasdk.utils import convert_to_list warnings.filterwarnings("ignore") plt.rcParams['figure.figsize'] = (15, 10) plt.rcParams.update({'font.size': 14}) # In[82]: START_DATE = datetime(2019, 6, 10) END_DATE = datetime(2020, 6, 10) UNIT = 'b' # # Define helper functions # In[83]: def get_product_id_exact(product_name): if product_name is None: return None products = [p.id for p in Products().search(product_name).to_list() if p.name==product_name] assert len(products) == 1 return products[0] def get_geography_id_exact(geog_name): if geog_name is None: return None geogs = [g.id for g in Geographies().search(geog_name).to_list() if g.name==geog_name] assert len(geogs) == 1 return geogs[0] # In[84]: def merge(data_frames): return reduce( lambda left, right: pd.merge( left, right, left_index=True, right_index=True, how="outer" ), data_frames, ) # In[85]: def plot_df(df, title=None, unit=UNIT): df.plot(title=title, grid=True) plt.xlabel('date') plt.ylabel('k' + unit); # In[86]: def prepare_dataset(df_fs, product_names, destination_names, storage_names, filter_activity): # just keep key and value df_fs = df_fs[['key', 'value']] # use kilo unit not unit df_fs['value'] = df_fs['value'] / 1000 # rename columns col_name = str((destination_names or " ")) + \ " " + str((storage_names) or " ") + \ " " + str((product_names) or " ") + \ ": " + filter_activity df_fs = df_fs.rename(columns={'key': 'date', 'value': col_name}) # remove time zone from timestamp df_fs['date'] = pd.to_datetime(df_fs['date']).dt.tz_localize(None) return df_fs.set_index('date') # In[87]: def fetch_timeseries(filter_activity, product_names=None, destination_names=None, storage_names=None, unit=UNIT, frequency='day', start_date=START_DATE, end_date=END_DATE): # Generate IDs product_ids = [get_product_id_exact(name) for name in convert_to_list(product_names)] destination_ids = [get_geography_id_exact(name) for name in convert_to_list(destination_names)] storage_ids = [get_geography_id_exact(name) for name in convert_to_list(storage_names)] # Load Data df = CargoTimeSeries().search(timeseries_frequency=frequency, timeseries_unit=unit, disable_geographic_exclusion_rules=True, filter_products=product_ids, filter_destinations=destination_ids, filter_storage_locations=storage_ids, filter_activity=filter_activity, filter_time_min=start_date, filter_time_max=end_date).to_df() # Rename columns, set index etc return prepare_dataset(df, product_names, destination_names, storage_names, filter_activity) # # Define our commonly used constants # In[88]: clean = "Clean Petroleum Products" naphtha = "Naphtha" diesel_gasoil = "Diesel/Gasoil" gasoline_blending_components = "Gasoline/Blending Components" jet_kero = "Jet/Kero" # # Analysis Start # ### Load all global clean floating storage cargos # In[89]: clean_fs = fetch_timeseries("storing_state", clean) clean_fs.head() # In[90]: plot_df(clean_fs, "Global Clean Floating Storage") # ### Let's look at the product split of these global floating storage cargos # In[91]: data_frames = [ fetch_timeseries("storing_state", gasoline_blending_components), fetch_timeseries("storing_state", diesel_gasoil), fetch_timeseries("storing_state", naphtha), fetch_timeseries("storing_state", jet_kero) ] df_merged = merge(data_frames) df_merged.head() # In[92]: plot_df(df_merged) # ### Asia-only floating storage # In[93]: dfs_asia = [ fetch_timeseries("storing_state", storage_names="Asia", product_names=gasoline_blending_components), fetch_timeseries("storing_state", storage_names="Asia", product_names=diesel_gasoil), fetch_timeseries("storing_state", storage_names="Asia", product_names=naphtha), fetch_timeseries("storing_state", storage_names="Asia", product_names=jet_kero) ] df_asia = merge(dfs_asia) df_asia.head() # In[94]: plot_df(df_asia) # ### See how Diesel/Gasoil storage levels are split across Asian geographies # In[95]: dfs_diesel_gasoil_countries = [ fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names="South Korea"), fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names="India"), fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names="China"), fetch_timeseries("storing_state", product_names=diesel_gasoil, storage_names=["Singapore", "Malaysia", "Indonesia"]) ] df_diesel_gasoil_countries = merge(dfs_diesel_gasoil_countries) df_diesel_gasoil_countries.head() # In[96]: plot_df(df_diesel_gasoil_countries) # ### Diesel/Gasoil Asian imports # In[97]: end_date = datetime(2020, 5, 31) # In[98]: dfs_imports = [ fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Australia"), fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Indonesia"), fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Philippines"), fetch_timeseries("unloading_state", diesel_gasoil, unit='bpd', frequency='month', end_date=end_date, destination_names="Vietnam") ] df_imports = merge(dfs_imports) # In[99]: plot_df(df_imports)