#!/usr/bin/env python
# coding: utf-8
# # 🌊 How to use Python 🐍 to turn the tide
# A webinar showcasing **Vortexa**'s **Python Software Development Kit (SDK)**, demonstrating the ins & outs of the SDK, and how using Python can expand your energy industry insights.
#
#
# In[1]:
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows', 300)
from datetime import datetime
from dateutil.relativedelta import relativedelta
from vortexasdk import CargoTimeSeries, Products, Geographies, CargoMovements
# [Vortexa Platform](https://analytics.vortexa.com/?lat=0&lng=0&page=Live&products=%5B%2254af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11%22%2C%22fc2e0746428ed6109045fc1195a4ee4ef2e339d55b7b6e3a0fb01ecf28efc4f7%22%5D&tabId=8d7ae816-7c38-40bf-a27e-370c542ff2d3&zoom=3)
#
#
# In[2]:
# Lets get some crude product categories
products_df = Products().search().to_df(columns="all")
# In[3]:
products_df.head()
# In[4]:
products_df.shape
# In[ ]:
len(products_df)
# In[5]:
# Lets focus on some columns
product_new_labels = {
"id": "id",
"name":"Name",
"parent.0.name":"Parent",
"layer.0":"Layer",
"parent.0.layer.0":"Parent_Layer",
"hierarchy.0.label": "Level_0",
"hierarchy.1.label": "Level_1",
"meta.sulphur_min":"Average_Sulphur",
"meta.api_min":"Average_API",
}
products_df = products_df[list(product_new_labels.keys())].rename(product_new_labels, axis=1)
products_df.head()
# In[6]:
# Let's look at Crude
products_df[products_df['Parent'] == 'Crude']
# We can see the 6 main categories of crude above.
#
# A crude is either defined as **Sweet** or **Sour** and this is dependent on the percentage sulphur content (where sulphur is an undesirable contaminant, there are sulphur limits on most finished products meaning sulphur is removed in refining but can damage catalysts at refineries). **Sweet crude** has less that 0.5% sulphur content and is generally more valuable than sour grades.
#
# API refers to the density of the crude (as in how liquid it is at room temperature and how easy it is to pour). A higher API is a lighter crude, Light crudes tend to have API > 34, Medium is between 31-33 and Heavy is < 30.
#
#
#
# Source: https://www.eia.gov/todayinenergy/detail.php?id=33012
# In[7]:
# Let's look at Medium-Sour
products_df[products_df['Parent'] == 'Medium-Sour'][0:20]
# In[8]:
len(products_df[products_df['Parent'] == 'Medium-Sour'])
# We can see 20/99 **Medium-Sour** grades of **Crude**. But in general the following grades are well known examples of crudes by categories:
#
# * **Light-Sweet**: WTI (Benchmark US Grade) or Brent (North Sea)
#
# * **Light-Sour**: CPC Blend (Russia) or Murban from UAE
#
# * **Medium-Sweet**: Forcados (Nigeria) or Tupi/Lula (Brazil)
#
# * **Medium-Sour**: Middle-Eastern grades such as Arab Light (Saudi Arabia) or Basrah Light (Iraq); Also Russian Urals
#
# * **Heavy-Sweet**: DOBA (Chad biggest flow) or Djeno (Democratic Republic of Congo) - WAF grades
#
# * **Heavy-Sour**: Basrah Heavy (Iraq) or Maya (Mexico)
#
# # 1. A scenario: Crude Flow into A.R.A. (Amsterdam, Rotterdam, Antwerp)
# Suppose we care about specific product groups, we need to group products according to those groups.
# In[9]:
products_df.dtypes
# In[10]:
mask = ((products_df["Layer"]=="grade") & (products_df["Level_1"]=="Crude"))
crudes_df = products_df.loc[mask]
# In[11]:
crudes_df.head()
# In[12]:
crudes_df.shape
# Say, we care about the last 2 years or so:
# In[13]:
now = datetime.utcnow()
two_years_ago = now - relativedelta(months=24)
two_years_ago
# Now, "Crude" is both a category and a product itself. Lets fetch it's *unique* `id`:
# In[14]:
# Grab the ID for Crude
crude_search = Products().search(term=['Crude'], exact_term_match=True)
assert len(crude_search) == 1
crude_ID = crude_search[0]['id']
crude_ID
# [Vortexa Platform](https://analytics.vortexa.com/?lat=0&lng=0&page=Live&products=%5B%2254af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11%22%2C%22fc2e0746428ed6109045fc1195a4ee4ef2e339d55b7b6e3a0fb01ecf28efc4f7%22%5D&tabId=8d7ae816-7c38-40bf-a27e-370c542ff2d3&zoom=3)
#
#
# Let's inspect some geographical A.R.A. (Amsterdam, Rotterdam, Antwerp) regions:
# In[15]:
# Grab the ID for ARA
ara_search = Geographies().search("ARA Region",exact_term_match=True)
assert len(ara_search) == 1
ARA_ID = ara_search[0]['id']
ARA_ID
# We will use **ARA** region as it is the main refining hub for North West Europe and therefore a key crude importer.
#
# Let's get the cargo-movements we care about:
# In[16]:
# Create and run the query
ARA_crude_imports_df = CargoMovements().search(
filter_activity="unloading_start",
filter_time_min=two_years_ago,
filter_time_max=now,
filter_products=crude_ID,
filter_destinations=ARA_ID).to_df(columns="all")
# In[17]:
ARA_crude_imports_df.head()
# In[18]:
ARA_crude_imports_df.shape
# In[19]:
# Relabel the columns
new_labels = {
"cargo_movement_id": "cargo_movement_id",
"vessels.0.name": "vessel_name",
"vessels.0.vessel_class": "vessel_class",
"product.group.label": "product_group",
"product.grade.label": "product_grade",
"product.category.label": "product_category",
"quantity": "quantity",
"status": "status",
"events.cargo_port_load_event.0.location.port.label": "loading_port",
"events.cargo_port_load_event.0.end_timestamp": "loading_finish",
"events.cargo_port_unload_event.0.location.port.label": "unloading_port",
"events.cargo_port_unload_event.0.end_timestamp": "unloading_finish",
}
# In[20]:
ARA_crude_imports_df = ARA_crude_imports_df[list(new_labels.keys())].rename(new_labels,axis=1)
ARA_crude_imports_df.head()
# In[21]:
# Get the categories of Crude grades
crude_ARA_category = ARA_crude_imports_df.groupby('product_category').size().to_frame().rename(columns={0: "Number of CMs"}).reset_index()
crude_ARA_category
# In[22]:
crude_category_id_mapping = products_df.loc[products_df["Name"].isin(crude_ARA_category["product_category"].values)]
crude_category_id_mapping = crude_category_id_mapping[["id", "Name"]]
crude_category_id_mapping
# In[23]:
# The cargo unit for the time series (barrels)
TS_UNIT = 'bpd' # Measure in barrels
# The granularity of the time series
TS_FREQ = 'week' # Look at weekly imports
time_series_imports_per_product = []
for idx, row in crude_category_id_mapping.iterrows():
ARA_crude_imports_tm_df = CargoTimeSeries().search(
filter_activity = 'unloading_start',
filter_destinations=ARA_ID,
filter_products = row["id"],
timeseries_unit = TS_UNIT,
timeseries_frequency = TS_FREQ,
filter_time_min = two_years_ago, # Set the date range # quantity in bpd
filter_time_max = now).to_df().rename(columns = {"key": "date", "value": row["Name"]}).drop(columns=["count"]).set_index("date")
time_series_imports_per_product.append(ARA_crude_imports_tm_df)
time_series_imports_per_product_df = pd.concat(time_series_imports_per_product, axis=1)
# In[24]:
time_series_imports_per_product_df.head()
# In[25]:
time_series_imports_per_product_df = time_series_imports_per_product_df.fillna(0)
# In[26]:
time_series_imports_per_product_df.head()
# In[27]:
# Check if index is a date-time for plotting time-serries
type(time_series_imports_per_product_df.index)
# In[28]:
import matplotlib as plt
time_series_imports_per_product_df.plot.area(figsize=(35, 10), lw=3, stacked=False, grid=True, ylabel=TS_UNIT, title="ARA Crude Imports")
# This shows the crudes imported into **A.R.A.** by category, so we can see the key categories are **Medium-Sour** (this will be some Middle Eastern Grades and also Russian Urals), and Light-Sweet (mostly WTI from US but also some North Sea blends)
# ## 1.1. A scenario: Sulphur vs API Crude Flow into A.R.A.
# Now suppose we want a more granular analysis; one that focusses on grades on top of categories. Let's return to one of our previous datasets `ARA_crude_imports_df`. What do we have in there?
# In[29]:
ARA_crude_imports_df.head()
# Suppose we want to look into the `average-sulphur` and `average-api` quantities in these cargoes and try to understand the trends. To do that, we need the `sulphur` and `api` percentage values available in products.
# In[30]:
products_df[products_df["Parent"].isin(["Medium-Sour", "Medium-Sweet", "Heavy-Sour", "Heavy-Sweet", "Light-Sour", "Light-Sweet"])].head()
# These need to be multiplied with the quantities in cargo-movements so that we can get idea of their aggregates over time (always in relation to ARA imports). Let's revisit products though and look at the crudes we are interested in:
# So, let's merge `products_df` with `ARA_crude_imports_df`:
# In[31]:
ARA_crude_imports_df = ARA_crude_imports_df.merge(products_df[["Name", "Average_Sulphur", "Average_API"]], left_on="product_grade", right_on="Name", how="left")
ARA_crude_imports_df.head()
# Let's do some quality assurance:
# In[32]:
100*ARA_crude_imports_df["Average_Sulphur"].isnull().sum()/len(ARA_crude_imports_df)
# In[33]:
100*ARA_crude_imports_df["Average_API"].isnull().sum()/len(ARA_crude_imports_df)
# This is partly attributed to missing `product_grade` values:
# In[34]:
print(f"{len(ARA_crude_imports_df[ARA_crude_imports_df['product_grade'].isnull()])}/{len(ARA_crude_imports_df)}")
# So let's remove those rows:
# In[35]:
ARA_crude_imports_df = ARA_crude_imports_df[ARA_crude_imports_df['product_grade'].notnull()]
# In[36]:
print(f"{len(ARA_crude_imports_df[ARA_crude_imports_df['product_grade'].isnull()])}/{len(ARA_crude_imports_df)}")
# In[37]:
ARA_crude_imports_df.head()
# Much better... so no nulls now?
# In[38]:
ARA_crude_imports_df["Average_Sulphur"].isnull().sum()
# In[39]:
ARA_crude_imports_df["Average_API"].isnull().sum()
# Some times if the information is not available in `grades` we can get it from the `category` instead:
# In[40]:
ARA_crude_imports_df = ARA_crude_imports_df.merge(products_df[["Name", "Average_Sulphur", "Average_API"]], left_on="product_category", right_on="Name", how="left", suffixes=("", "_Category"))
ARA_crude_imports_df.head()
# In[41]:
ARA_crude_imports_df["Average_Sulphur_Final"] = ARA_crude_imports_df["Average_Sulphur"].combine_first(ARA_crude_imports_df["Average_Sulphur_Category"])
# In[42]:
ARA_crude_imports_df["Average_API_Final"] = ARA_crude_imports_df["Average_API"].combine_first(ARA_crude_imports_df["Average_API_Category"])
# In[43]:
ARA_crude_imports_df = ARA_crude_imports_df.drop(columns=["Average_Sulphur_Category", "Average_API_Category", "Average_Sulphur", "Average_API", "Name_Category"]).rename(columns={"Average_API_Final": "Average_API", "Average_Sulphur_Final": "Average_Sulphur"})
ARA_crude_imports_df.head()
# In[44]:
ARA_crude_imports_df["Average_API"].isnull().sum()
# In[45]:
ARA_crude_imports_df["Average_Sulphur"].isnull().sum()
# ## 1.2. On to plotting things again!
#
# To create the charts we care about, we will need some additional columns:
# - `Average_Sulphur_Quantity`
# - `Average_API_Quantity`
# - `unloading_finish_date` (in weeks), and
# - `unloading_finish_year`
#
#
# In[46]:
ARA_crude_imports_df["Average_Sulphur_Quantity"] = ARA_crude_imports_df["quantity"] * ARA_crude_imports_df["Average_Sulphur"]
# In[47]:
ARA_crude_imports_df["Average_API_Quantity"] = ARA_crude_imports_df["quantity"] * ARA_crude_imports_df["Average_API"]
# In[48]:
ARA_crude_imports_df["unloading_finish_date"] = pd.to_datetime(ARA_crude_imports_df["unloading_finish"].dt.date)
# In[49]:
ARA_crude_imports_df["unloading_finish_week"] = ARA_crude_imports_df["unloading_finish"].dt.to_period('W-Mon').apply(lambda x: x.start_time)
# In[50]:
ARA_crude_imports_df["unloading_finish_year"] = ARA_crude_imports_df["unloading_finish"].dt.to_period('Y')
# In[51]:
ARA_crude_imports_df.head()
# ## 1.2.1. Sulphur (Let's Normalise the Quantities)
# So why do we need to normalise the quantities?
#
#
# In[52]:
sulphur_daily_totals_df = ARA_crude_imports_df.groupby(["unloading_finish_week"]).agg({"quantity": "sum", "Average_Sulphur_Quantity": "sum"}).rename(columns={"quantity": "total_quantity", "Average_Sulphur_Quantity": "Total_Average_Sulphur_Quantity"})
# In[53]:
sulphur_daily_totals_df.head()
# In[54]:
sulphur_daily_totals_df["average_sulphur"] = sulphur_daily_totals_df["Total_Average_Sulphur_Quantity"]/(sulphur_daily_totals_df["total_quantity"])
# In[55]:
sulphur_daily_totals_df.head()
# In[56]:
sulphur_daily_totals_df["average_sulphur-rolling_mean"] = sulphur_daily_totals_df["average_sulphur"].rolling(7).mean().fillna(method="bfill")
sulphur_daily_totals_df.head()
# In[57]:
sulphur_daily_totals_df[["average_sulphur", "average_sulphur-rolling_mean"]].plot(figsize=(35, 10), lw=3, stacked=False, grid=True, ylabel="sulphur_percentage", title="Average Sulphur of Crude imports into A.R.A")
# ## 1.2.2. API (with Normalised Quantities)
# In[58]:
api_daily_totals_df = ARA_crude_imports_df.groupby(["unloading_finish_week"]).agg({"quantity": "sum", "Average_API_Quantity": "sum"}).rename(columns={"quantity": "total_quantity", "Average_API_Quantity": "Total_Average_API_Quantity"})
# In[59]:
api_daily_totals_df["average_API"] = api_daily_totals_df["Total_Average_API_Quantity"]/(api_daily_totals_df["total_quantity"])
# In[60]:
api_daily_totals_df["average_API-rolling_mean"] = api_daily_totals_df["average_API"].rolling(7).mean().fillna(method="bfill")
# In[61]:
api_daily_totals_df[["average_API", "average_API-rolling_mean"]].plot(figsize=(35, 10), lw=3, grid=True, ylabel="API_percentage", title="Average API of Crude imports into A.R.A")
# # END of WEBINAR
# In[ ]: