#!/usr/bin/env python # coding: utf-8 # # 🌊 How to use Python 🐍 to turn the tide # A webinar showcasing **Vortexa**'s **Python Software Development Kit (SDK)**, demonstrating the ins & outs of the SDK, and how using Python can expand your energy industry insights. # # # In[1]: import pandas as pd pd.set_option('display.max_columns', 500) pd.set_option('display.max_colwidth', 100) pd.set_option('display.max_rows', 300) from datetime import datetime from dateutil.relativedelta import relativedelta from vortexasdk import CargoTimeSeries, Products, Geographies, CargoMovements # [Vortexa Platform](https://analytics.vortexa.com/?lat=0&lng=0&page=Live&products=%5B%2254af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11%22%2C%22fc2e0746428ed6109045fc1195a4ee4ef2e339d55b7b6e3a0fb01ecf28efc4f7%22%5D&tabId=8d7ae816-7c38-40bf-a27e-370c542ff2d3&zoom=3) # # Crude Products # In[2]: # Lets get some crude product categories products_df = Products().search().to_df(columns="all") # In[3]: products_df.head() # In[4]: products_df.shape # In[ ]: len(products_df) # In[5]: # Lets focus on some columns product_new_labels = { "id": "id", "name":"Name", "parent.0.name":"Parent", "layer.0":"Layer", "parent.0.layer.0":"Parent_Layer", "hierarchy.0.label": "Level_0", "hierarchy.1.label": "Level_1", "meta.sulphur_min":"Average_Sulphur", "meta.api_min":"Average_API", } products_df = products_df[list(product_new_labels.keys())].rename(product_new_labels, axis=1) products_df.head() # In[6]: # Let's look at Crude products_df[products_df['Parent'] == 'Crude'] # We can see the 6 main categories of crude above. # # A crude is either defined as **Sweet** or **Sour** and this is dependent on the percentage sulphur content (where sulphur is an undesirable contaminant, there are sulphur limits on most finished products meaning sulphur is removed in refining but can damage catalysts at refineries). **Sweet crude** has less that 0.5% sulphur content and is generally more valuable than sour grades. # # API refers to the density of the crude (as in how liquid it is at room temperature and how easy it is to pour). A higher API is a lighter crude, Light crudes tend to have API > 34, Medium is between 31-33 and Heavy is < 30. # # Crude Products # # Source: https://www.eia.gov/todayinenergy/detail.php?id=33012 # In[7]: # Let's look at Medium-Sour products_df[products_df['Parent'] == 'Medium-Sour'][0:20] # In[8]: len(products_df[products_df['Parent'] == 'Medium-Sour']) # We can see 20/99 **Medium-Sour** grades of **Crude**. But in general the following grades are well known examples of crudes by categories: # # * **Light-Sweet**: WTI (Benchmark US Grade) or Brent (North Sea) # # * **Light-Sour**: CPC Blend (Russia) or Murban from UAE # # * **Medium-Sweet**: Forcados (Nigeria) or Tupi/Lula (Brazil) # # * **Medium-Sour**: Middle-Eastern grades such as Arab Light (Saudi Arabia) or Basrah Light (Iraq); Also Russian Urals # # * **Heavy-Sweet**: DOBA (Chad biggest flow) or Djeno (Democratic Republic of Congo) - WAF grades # # * **Heavy-Sour**: Basrah Heavy (Iraq) or Maya (Mexico) # # # 1. A scenario: Crude Flow into A.R.A. (Amsterdam, Rotterdam, Antwerp) # Suppose we care about specific product groups, we need to group products according to those groups. # In[9]: products_df.dtypes # In[10]: mask = ((products_df["Layer"]=="grade") & (products_df["Level_1"]=="Crude")) crudes_df = products_df.loc[mask] # In[11]: crudes_df.head() # In[12]: crudes_df.shape # Say, we care about the last 2 years or so: # In[13]: now = datetime.utcnow() two_years_ago = now - relativedelta(months=24) two_years_ago # Now, "Crude" is both a category and a product itself. Lets fetch it's *unique* `id`: # In[14]: # Grab the ID for Crude crude_search = Products().search(term=['Crude'], exact_term_match=True) assert len(crude_search) == 1 crude_ID = crude_search[0]['id'] crude_ID # [Vortexa Platform](https://analytics.vortexa.com/?lat=0&lng=0&page=Live&products=%5B%2254af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11%22%2C%22fc2e0746428ed6109045fc1195a4ee4ef2e339d55b7b6e3a0fb01ecf28efc4f7%22%5D&tabId=8d7ae816-7c38-40bf-a27e-370c542ff2d3&zoom=3) # # ARA Geographies # Let's inspect some geographical A.R.A. (Amsterdam, Rotterdam, Antwerp) regions: # In[15]: # Grab the ID for ARA ara_search = Geographies().search("ARA Region",exact_term_match=True) assert len(ara_search) == 1 ARA_ID = ara_search[0]['id'] ARA_ID # We will use **ARA** region as it is the main refining hub for North West Europe and therefore a key crude importer. # # Let's get the cargo-movements we care about: # In[16]: # Create and run the query ARA_crude_imports_df = CargoMovements().search( filter_activity="unloading_start", filter_time_min=two_years_ago, filter_time_max=now, filter_products=crude_ID, filter_destinations=ARA_ID).to_df(columns="all") # In[17]: ARA_crude_imports_df.head() # In[18]: ARA_crude_imports_df.shape # In[19]: # Relabel the columns new_labels = { "cargo_movement_id": "cargo_movement_id", "vessels.0.name": "vessel_name", "vessels.0.vessel_class": "vessel_class", "product.group.label": "product_group", "product.grade.label": "product_grade", "product.category.label": "product_category", "quantity": "quantity", "status": "status", "events.cargo_port_load_event.0.location.port.label": "loading_port", "events.cargo_port_load_event.0.end_timestamp": "loading_finish", "events.cargo_port_unload_event.0.location.port.label": "unloading_port", "events.cargo_port_unload_event.0.end_timestamp": "unloading_finish", } # In[20]: ARA_crude_imports_df = ARA_crude_imports_df[list(new_labels.keys())].rename(new_labels,axis=1) ARA_crude_imports_df.head() # In[21]: # Get the categories of Crude grades crude_ARA_category = ARA_crude_imports_df.groupby('product_category').size().to_frame().rename(columns={0: "Number of CMs"}).reset_index() crude_ARA_category # In[22]: crude_category_id_mapping = products_df.loc[products_df["Name"].isin(crude_ARA_category["product_category"].values)] crude_category_id_mapping = crude_category_id_mapping[["id", "Name"]] crude_category_id_mapping # In[23]: # The cargo unit for the time series (barrels) TS_UNIT = 'bpd' # Measure in barrels # The granularity of the time series TS_FREQ = 'week' # Look at weekly imports time_series_imports_per_product = [] for idx, row in crude_category_id_mapping.iterrows(): ARA_crude_imports_tm_df = CargoTimeSeries().search( filter_activity = 'unloading_start', filter_destinations=ARA_ID, filter_products = row["id"], timeseries_unit = TS_UNIT, timeseries_frequency = TS_FREQ, filter_time_min = two_years_ago, # Set the date range # quantity in bpd filter_time_max = now).to_df().rename(columns = {"key": "date", "value": row["Name"]}).drop(columns=["count"]).set_index("date") time_series_imports_per_product.append(ARA_crude_imports_tm_df) time_series_imports_per_product_df = pd.concat(time_series_imports_per_product, axis=1) # In[24]: time_series_imports_per_product_df.head() # In[25]: time_series_imports_per_product_df = time_series_imports_per_product_df.fillna(0) # In[26]: time_series_imports_per_product_df.head() # In[27]: # Check if index is a date-time for plotting time-serries type(time_series_imports_per_product_df.index) # In[28]: import matplotlib as plt time_series_imports_per_product_df.plot.area(figsize=(35, 10), lw=3, stacked=False, grid=True, ylabel=TS_UNIT, title="ARA Crude Imports") # This shows the crudes imported into **A.R.A.** by category, so we can see the key categories are **Medium-Sour** (this will be some Middle Eastern Grades and also Russian Urals), and Light-Sweet (mostly WTI from US but also some North Sea blends) # ## 1.1. A scenario: Sulphur vs API Crude Flow into A.R.A. # Now suppose we want a more granular analysis; one that focusses on grades on top of categories. Let's return to one of our previous datasets `ARA_crude_imports_df`. What do we have in there? # In[29]: ARA_crude_imports_df.head() # Suppose we want to look into the `average-sulphur` and `average-api` quantities in these cargoes and try to understand the trends. To do that, we need the `sulphur` and `api` percentage values available in products. # In[30]: products_df[products_df["Parent"].isin(["Medium-Sour", "Medium-Sweet", "Heavy-Sour", "Heavy-Sweet", "Light-Sour", "Light-Sweet"])].head() # These need to be multiplied with the quantities in cargo-movements so that we can get idea of their aggregates over time (always in relation to ARA imports). Let's revisit products though and look at the crudes we are interested in: # So, let's merge `products_df` with `ARA_crude_imports_df`: # In[31]: ARA_crude_imports_df = ARA_crude_imports_df.merge(products_df[["Name", "Average_Sulphur", "Average_API"]], left_on="product_grade", right_on="Name", how="left") ARA_crude_imports_df.head() # Let's do some quality assurance: # In[32]: 100*ARA_crude_imports_df["Average_Sulphur"].isnull().sum()/len(ARA_crude_imports_df) # In[33]: 100*ARA_crude_imports_df["Average_API"].isnull().sum()/len(ARA_crude_imports_df) # This is partly attributed to missing `product_grade` values: # In[34]: print(f"{len(ARA_crude_imports_df[ARA_crude_imports_df['product_grade'].isnull()])}/{len(ARA_crude_imports_df)}") # So let's remove those rows: # In[35]: ARA_crude_imports_df = ARA_crude_imports_df[ARA_crude_imports_df['product_grade'].notnull()] # In[36]: print(f"{len(ARA_crude_imports_df[ARA_crude_imports_df['product_grade'].isnull()])}/{len(ARA_crude_imports_df)}") # In[37]: ARA_crude_imports_df.head() # Much better... so no nulls now? # In[38]: ARA_crude_imports_df["Average_Sulphur"].isnull().sum() # In[39]: ARA_crude_imports_df["Average_API"].isnull().sum() # Some times if the information is not available in `grades` we can get it from the `category` instead: # In[40]: ARA_crude_imports_df = ARA_crude_imports_df.merge(products_df[["Name", "Average_Sulphur", "Average_API"]], left_on="product_category", right_on="Name", how="left", suffixes=("", "_Category")) ARA_crude_imports_df.head() # In[41]: ARA_crude_imports_df["Average_Sulphur_Final"] = ARA_crude_imports_df["Average_Sulphur"].combine_first(ARA_crude_imports_df["Average_Sulphur_Category"]) # In[42]: ARA_crude_imports_df["Average_API_Final"] = ARA_crude_imports_df["Average_API"].combine_first(ARA_crude_imports_df["Average_API_Category"]) # In[43]: ARA_crude_imports_df = ARA_crude_imports_df.drop(columns=["Average_Sulphur_Category", "Average_API_Category", "Average_Sulphur", "Average_API", "Name_Category"]).rename(columns={"Average_API_Final": "Average_API", "Average_Sulphur_Final": "Average_Sulphur"}) ARA_crude_imports_df.head() # In[44]: ARA_crude_imports_df["Average_API"].isnull().sum() # In[45]: ARA_crude_imports_df["Average_Sulphur"].isnull().sum() # ## 1.2. On to plotting things again! # # To create the charts we care about, we will need some additional columns: # - `Average_Sulphur_Quantity` # - `Average_API_Quantity` # - `unloading_finish_date` (in weeks), and # - `unloading_finish_year` # # # In[46]: ARA_crude_imports_df["Average_Sulphur_Quantity"] = ARA_crude_imports_df["quantity"] * ARA_crude_imports_df["Average_Sulphur"] # In[47]: ARA_crude_imports_df["Average_API_Quantity"] = ARA_crude_imports_df["quantity"] * ARA_crude_imports_df["Average_API"] # In[48]: ARA_crude_imports_df["unloading_finish_date"] = pd.to_datetime(ARA_crude_imports_df["unloading_finish"].dt.date) # In[49]: ARA_crude_imports_df["unloading_finish_week"] = ARA_crude_imports_df["unloading_finish"].dt.to_period('W-Mon').apply(lambda x: x.start_time) # In[50]: ARA_crude_imports_df["unloading_finish_year"] = ARA_crude_imports_df["unloading_finish"].dt.to_period('Y') # In[51]: ARA_crude_imports_df.head() # ## 1.2.1. Sulphur (Let's Normalise the Quantities) # So why do we need to normalise the quantities? # # ARA Geographies # In[52]: sulphur_daily_totals_df = ARA_crude_imports_df.groupby(["unloading_finish_week"]).agg({"quantity": "sum", "Average_Sulphur_Quantity": "sum"}).rename(columns={"quantity": "total_quantity", "Average_Sulphur_Quantity": "Total_Average_Sulphur_Quantity"}) # In[53]: sulphur_daily_totals_df.head() # In[54]: sulphur_daily_totals_df["average_sulphur"] = sulphur_daily_totals_df["Total_Average_Sulphur_Quantity"]/(sulphur_daily_totals_df["total_quantity"]) # In[55]: sulphur_daily_totals_df.head() # In[56]: sulphur_daily_totals_df["average_sulphur-rolling_mean"] = sulphur_daily_totals_df["average_sulphur"].rolling(7).mean().fillna(method="bfill") sulphur_daily_totals_df.head() # In[57]: sulphur_daily_totals_df[["average_sulphur", "average_sulphur-rolling_mean"]].plot(figsize=(35, 10), lw=3, stacked=False, grid=True, ylabel="sulphur_percentage", title="Average Sulphur of Crude imports into A.R.A") # ## 1.2.2. API (with Normalised Quantities) # In[58]: api_daily_totals_df = ARA_crude_imports_df.groupby(["unloading_finish_week"]).agg({"quantity": "sum", "Average_API_Quantity": "sum"}).rename(columns={"quantity": "total_quantity", "Average_API_Quantity": "Total_Average_API_Quantity"}) # In[59]: api_daily_totals_df["average_API"] = api_daily_totals_df["Total_Average_API_Quantity"]/(api_daily_totals_df["total_quantity"]) # In[60]: api_daily_totals_df["average_API-rolling_mean"] = api_daily_totals_df["average_API"].rolling(7).mean().fillna(method="bfill") # In[61]: api_daily_totals_df[["average_API", "average_API-rolling_mean"]].plot(figsize=(35, 10), lw=3, grid=True, ylabel="API_percentage", title="Average API of Crude imports into A.R.A") # # END of WEBINAR # In[ ]: