This notebook outlines relevant SDK solutions relevant to financial institutions. The use cases below include the following:
The first step is to import the libraries required to query the data in question. To do this, run the below cell.
from vortexasdk import VoyagesTimeseries, VoyagesSearchEnriched, CargoTimeSeries, OnshoreInventoriesTimeseries, FreightPricingTimeseries, Vessels, Geographies, Products
from datetime import datetime
import pandas as pd
import numpy as np
import time
import plotly.express as px
In the Vortexa SDK, we cannot refer to products, vessels or geographies by name. Each product, vessel or geography in our data has its own unique ID. These IDs are used to refer to the products, vessels or geographies in our database when making queries.
In the code below, remove the hash tags in lines 3&4 or in lines 14&15 in order to run a search for the desired product or geography. You can change the search term to refine your search.
Once you find the id, you can copy it and assign it as an object. This way, you can refer to the product or geography by the name you have given to it. In the examples below, we assign names to geographies: US Gulf, United States and Europe, as well as to the products: crude, LPG and CPP.
# search for geography ids (remove hashtags to search)
# full_length_df = Geographies().search(term=["europe"]).to_df()
# print(full_length_df.to_string(index=False))
# Store geography ids
us_gulf='e0d68b7a4ac37c97e3387471644d8b5c2a4be16a50092676ec3bec08408a2ebb'
united_states='2d92cc08f22524dba59f6a7e340f132a9da0ce9573cca968eb8e3752ef17a963'
europe='f39d455f5d38907394d6da3a91da4e391f9a34bd6a17e826d6042761067e88f4'
# search for product ids (remove hashtags below to search)
# product_search = Products().search(term=['Crude']).to_df()
# print (product_search.to_string(index=False))
# Store product ids
crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'
cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'
lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'
The code below extracts onshore crude inventories per week, as well as floating storage volumes per week. These are then added together to provide a picture of total oil supply per week. This can be used in conjunction with crude oil price data to anticipate when price trends might change.
# Function
def weekly_crude_supply(start_y, start_m, start_d, unit):
# Define constants
crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'
today=datetime.today()
# Pull onshore crude inventory data
inventories = OnshoreInventoriesTimeseries().search(
time_min=datetime(start_y, start_m, start_d),
time_max=today,
crude_confidence=['confirmed', 'probable'],
timeseries_frequency="week",
timeseries_unit=unit,
timeseries_unit_operator="fill").to_df()
# Convert dates to weeks and years for merging
inventories["weeks"] = inventories['key'].dt.strftime('W%U-%Y')
inventories["date"] = inventories['key'].dt.strftime('%d/%m/%Y')
inventories=pd.concat([inventories['date'], inventories['weeks'], inventories['value']], axis=1)
inventories.columns=['date', 'Date', 'Crude inventories']
# Pull floating stoarge data
floating_storage = CargoTimeSeries().search(
filter_time_min=datetime(start_y, start_m, start_d),
filter_time_max=today,
filter_products=crude,
filter_activity="storing_state",
timeseries_frequency="day",
timeseries_unit=unit).to_df()
# Convert dates to weeks and years for aggregation
floating_storage["weeks"] = floating_storage['key'].dt.strftime('W%U-%Y')
# Get a unique list of the dates in weekly format
dates = list(floating_storage["weeks"].unique())
# Get the average distance travelled per voyage in each week
values = []
for i in range(len(dates)):
g = floating_storage.loc[floating_storage['weeks'] == dates[i]]
dists = pd.to_numeric(g["value"])
mean = dists.mean()
values.append(mean)
dates = pd.DataFrame(dates)
values = pd.DataFrame(values)
floating_storage = pd.concat([dates, values], axis = 1)
floating_storage.columns = ['Date', 'Floating storage']
# Merge floating storage and inventory data
merged_df = pd.merge(inventories, floating_storage, on='Date', how='inner')
merged_df['Crude supply']=merged_df['Crude inventories'] + merged_df['Floating storage']
# Plot data
fig = px.line(
merged_df,
title="Global crude storage",
x="Date",
y="Crude supply",
labels={
"Date":"Date",
"Crude supply":"bbl"
},
)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
return merged_df
Run the cell below to see a plot of the data
supply=weekly_crude_supply(2021, 1, 1, "b")
The code below provides a way to view import and export data for all products/regions within the Vortexa database. This can be used to monitor the imports and exports of major oil producing/consuming nations.
# Import/export functions
def exports(origin, destination, product, start_y, start_m, start_d, unit, freq, title):
# Define constants
today=datetime.today()
# Pull export data
exports = CargoTimeSeries().search(
filter_time_min=datetime(start_y, start_m, start_d),
filter_time_max=today,
filter_origins=origin,
filter_destinations=destination,
filter_products=product,
filter_activity="loading_end",
timeseries_frequency=freq,
timeseries_unit=unit).to_df()
# Format and compile data
exports=pd.concat([exports['key'], exports['value']], axis=1)
exports.columns=['Date', 'Cargo volume']
# Plot data
fig = px.bar(
exports,
title=title,
x="Date",
y="Cargo volume",
labels={
"Date":"Date",
"Cargo volume":unit
},
)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
return exports
def imports(origin, destination, product, start_y, start_m, start_d, unit, freq, title):
# Define constants
today=datetime.today()
# Pull import data
imports = CargoTimeSeries().search(
filter_time_min=datetime(start_y, start_m, start_d),
filter_time_max=today,
filter_origins=origin,
filter_destinations=destination,
filter_products=product,
filter_activity="unloading_start",
timeseries_frequency=freq,
timeseries_unit=unit).to_df()
# Format and compile data
imports=pd.concat([imports['key'], imports['value']], axis=1)
imports.columns=['Date', 'Cargo volume']
# Plot data
fig = px.bar(
imports,
title=title,
x="Date",
y="Cargo volume",
labels={
"Date":"Date",
"Cargo volume":unit
},
)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
return imports
Run the cell below to view examples for US imports, US exports and Europe imports for crude oil.
us_crude_imports=imports(None, united_states, crude, 2021, 1, 1, 'bpd', 'month', 'US crude imports')
us_crude_exports=exports(united_states, None, crude, 2021, 1, 1, 'bpd', 'month', 'US crude exports')
europe_crude_imports=imports(None, europe, crude, 2021, 1, 1, 'bpd', 'month', 'Europe crude imports')
The below code demonstrates how to pull freight rates for multiple Baltic Exchange trade routes. Freight rates can be a valuable indicator of market demand. In the example below, we plot the US Gulf MR freight rates, which can serve as an indicator for US Gulf diesel price movements.
# Define the rates you want to see
all_rates = ['TC1', 'TC2_37', 'TC5', 'TC6', 'TC7', 'TC8', 'TC9', 'TC10', 'TC11', 'TC12', 'TC14', 'TC15', 'TC16', 'TC17', 'TC18', 'TC19', 'TD1', 'TD2', 'TD3C', 'TD6', 'TD7', 'TD8', 'TD9', 'TD14', 'TD15', 'TD17', 'TD18', 'TD19', 'TD20', 'TD21', 'TD22', 'TD23', 'TD24', 'TD25', 'TD26', 'BLPG1', 'BLPG2', 'BLPG3']
usg_mr_rates=['TC14', 'TC18']
# Helper function to remove NAs and match values to correct dates
def match_dates(data):
# Replace blanks with pandas NA values
data.replace('', pd.NA, inplace=True)
mark = []
# Mark rows containing any NA values as 'drop'
for i in range(len(data)):
if data.iloc[i].notna().all() == False:
mark = mark + ['drop']
else:
mark = mark + ['keep']
data['mark'] = mark
data = data[data['mark'] == 'keep']
data = data.iloc[:, :-1]
return data
# Function to query a given list of freight rates and remove NA values using the above helper function
def freight_rates(start_y, start_m, start_d, rates, unit, freq):
# Make an initial empty data frame
final=pd.DataFrame()
# Obtain just the dates
dates=(FreightPricingTimeseries().search(
time_min=datetime(start_y, start_m, start_d),
time_max=datetime.today(),
routes=rates[0],
breakdown_property=unit,
breakdown_frequency=freq))
# Correctly label date column
dates=dates.to_df()
dates=pd.concat([dates["key"]], axis=1)
final=dates
final.columns=['Date']
# Obtain freight rate values for each route specified
for i in range(len(rates)):
df=(FreightPricingTimeseries().search(
time_min=datetime(start_y, start_m, start_d),
time_max=datetime.today(),
routes=rates[i],
breakdown_property=unit,
breakdown_frequency=freq))
df=df.to_df()
df2=df["value"]
final=pd.concat([final, df2], axis = 1)
# Define column names
names=['Date'] + rates
final.columns=names
# Format dates
final['Date']=pd.to_datetime(final['Date'])
final['Date']=final['Date'].dt.strftime("%d-%m-%Y")
# Remove NAs
final=match_dates(final)
# Plot data
fig = px.line(
final,
x="Date",
y=names[1:],
labels={
"Date":"Date",
"value":"$/ton"
},
)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
return final
Run the cell below to see a plot of the specified freight rates.
usg_rates=freight_rates(2022, 1, 1, usg_mr_rates, 'cost', 'day')
The code below demonstrates how to query and plot vessel speed data for various regions, products, vessel classes or vessel statuses. Ballast speeds in particular can serve as an indicator of how busy freight markets are. The example below shows the vessel speeds for MR tankers heading towards the US Gulf.
# Average speed function
def average_speed(start_y, start_m, start_d, origin, destination, vessels, prod, status, freq):
# Define constants
today=datetime.today()
# Pull speeds data
speeds = VoyagesTimeseries().search(
time_min=datetime(start_y, start_m, start_d),
time_max=today,
voyage_status=status,
origins=origin,
destinations=destination,
vessels=vessels,
latest_products=prod,
breakdown_frequency=freq,
breakdown_property="avg_speed",
breakdown_unit_operator="avg").to_df()
# Compile data and rename columns
speeds=pd.concat([speeds['key'], speeds['value']], axis=1)
speeds.columns=['Date', 'Average speed (kn)']
# Plot data
fig = px.line(
speeds,
x="Date",
y='Average speed (kn)',
labels={
"Date":"Date",
"value":"Average speed (kn)"
},
)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
return speeds
Run the cell below to view a plot of the vessel speed data.
ballast_speed_to_us_gulf=average_speed(2022, 8, 14, None, us_gulf, 'handymax', cpp, 'ballast', 'day')
The code below captures the next ballast voyage of each vessel operating on a specified route. The output consists of counts & percentages of ballast voyages per shipping region, as well as a dataframe of the ballast voyages that make up the counts and percentages. This can be used to anticipate changes in fleet supply and behaviour.
# Post voyage ballast distribution retrieval
def monthly_post_route_ballast_distribution(origin, origin_excl, destination, destination_excl, vessel_class, product, product_excl, start_y, start_m, start_d, end_y, end_m, end_d, end):
# Pull the laden voyages which occurred in the required timeframe
route = VoyagesSearchEnriched().search(
origins = origin,
origins_excluded = origin_excl,
destinations = destination,
destinations_excluded = destination_excl,
time_min = datetime(start_y, start_m, start_d),
time_max = datetime(end_y, end_m, end_d, 23, 59, 59),
vessels = vessel_class,
products = product,
products_excluded = product_excl
)
# Convert to dataframe
route = pd.DataFrame(route)
# Sort by end_timestamp
route["end_timestamp"] = pd.to_datetime(route["end_timestamp"])
route.sort_values(by='end_timestamp', ascending = True, inplace=True)
# Remove null end_timestamps
route.drop(route[pd.isnull(route['end_timestamp']) == True].index, inplace = True)
# Remove voyages that end past the specified end date
route = route[(route['end_timestamp'] <= end)]
route = route.dropna(subset=['next_voyage_id'])
# Get the next voyage IDs
next_voyage_id_list = route["next_voyage_id"].unique()
next_voyage_id_list = next_voyage_id_list.tolist()
# Get voyages corresponding to the next voyage IDs
post_route = VoyagesSearchEnriched().search(
voyage_id = next_voyage_id_list,
columns = "all")
# Convert this to dataframe
df = post_route.to_df()
# Sort them by their start dates (end date of laden voyage/discharge date)
df["START DATE"] = pd.to_datetime(df["START DATE"])
df.sort_values(by='START DATE', ascending = True, inplace=True)
# Relabel blank destinations as Undetermined
df['DESTINATION SHIPPING REGION'] = df['DESTINATION SHIPPING REGION'].replace([''],'Undetermined')
# Remove laden results
df = df.loc[df["VOYAGE STATUS"] == 'Ballast']
# Store the unique destinations
dests = df["DESTINATION SHIPPING REGION"].unique()
dests = dests.tolist()
dest_counts = []
# Count the number of times each ballast destination is declared
for i in range(len(dests)):
g = len(df.loc[df['DESTINATION SHIPPING REGION'] == dests[i]])
dest_counts.append(g)
# Sort destinations by count
dests = pd.DataFrame(dests)
dest_counts = pd.DataFrame(dest_counts)
ranked = pd.concat([dests, dest_counts], axis = 1)
columns = ['Destination', 'Count']
ranked.columns = columns
ranked.sort_values(by='Count', ascending = False, inplace=True)
# Get a list of ranked destinations
dests = ranked["Destination"].tolist()
# Convert dates of ballast voyages to months and years for counting purposes
df["months"] = df['START DATE'].dt.strftime('%m-%Y')
# Get a unique list of the dates in month/year format
dates = df["months"].unique()
dates = dates.tolist()
# Count the number of times each ballast destination is declared in each month
counts2 = []
for j in range(len(dests)):
for i in range(len(dates)):
g = ((df['DESTINATION SHIPPING REGION'] == dests[j]) & (df['months'] == dates[i])).sum()
counts2.append(g)
# Specify interval to select destination counts
k = int(len(counts2)/len(dests))
# Add ballast destination counts to dataframe
raw_counts = []
raw_counts = pd.DataFrame()
raw_counts["Date"] = dates
for i in range(len(dests)):
raw_counts[dests[i]] = counts2[k*i : k*(i+1)]
# Turn ballast destination counts into an array so you can calculate percentages
arr = np.array(raw_counts)
# Delete the dates
arr = np.delete(arr, 0, axis=1)
# Calculate percentages from the ballast destination counts
for i in range(len(arr[:,0])):
sum = np.sum(arr[i,:])
for j in range(len(arr[0,:])):
prop = arr[i, j]/sum
arr[i, j] = prop
props = pd.DataFrame(arr)
# Label the columns
columns = dests
props.columns = columns
# Add in the date as the first column
props.insert(0, 'Date', dates)
# Check that voyages are in fact all ballast
final_check = df["VOYAGE STATUS"].unique()
print("All voyages are", final_check)
# Change names of the output files here (filepath may need to be edited in Windows)
raw_counts.to_csv("~/Desktop/Monthly ballast distribution counts.csv", index=False)
props.to_csv("~/Desktop/Monthly ballast distribution percentages.csv", index=False)
df.to_csv("~/Desktop/Monthly ballast dist voyages.csv", index=False)
return raw_counts, props, df