In [19]:

import vortexasdk as v
import pandas as pd 
import plotly.express as px 
from datetime import datetime
import dateutil.relativedelta
import os
import requests
import json

In [20]:

# search for geography ids (remove hashtags to search)

# full_length_df = v.Geographies().search(term=["Northeast As"]).to_df()
# print(full_length_df.to_string(index=False))

meg='0427e0f9d52b38c1a98b68c59b8fd80cb1c508e44882e96611e48ef5b140d927'
europe='f39d455f5d38907394d6da3a91da4e391f9a34bd6a17e826d6042761067e88f4'
russia='b996521be9c996db3560ca234a56286ac38f798d34be229437555fab4f12a6a5'
brazil='6ac49669e238276d51719480fddfee2e18cf265f43678ed995071d363ee943f5'
gom='37c8c4eeb730d1cd41f90ca6bf95c923222b0734b1b0336a475acce821f87ebd'
nea='5600764fc92f8cce630f3994e7af4d900945d54b29f6e4a07fde0c5e17f7d738'

# search for product ids (remove hashtags to search)

# product_search = v.Products().search(term=['diesel']).to_df()
# print (product_search.to_string(index=False))

crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'
cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'
lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'
diesel_go='deda35eb9ca56b54e74f0ff370423f9a8c61cf6a3796fcb18eaeeb32a8c290bb'

In [21]:

# Helper function for pulling vessel position history
api_key = os.environ['VORTEXA_API_KEY']

def extract_vessel_positions(list_of_vessels, start_timestamp, end_timestamp, interval):
    
    URL1 = "https://api.vortexa.com/v6/signals/vessel-positions"
    
    URL2 = "https://api.vortexa.com/v6/signals/vessel-draughts"

    URL3 = "https://api.vortexa.com/v6/signals/vessel-declared-destination"
        
    start_timestamp_iso = start_timestamp.isoformat()
    end_timestamp_iso = end_timestamp.isoformat()
    
    ## Make API call
    PARAMS = {
        'apikey':api_key,
        'time_min':start_timestamp_iso,
        'time_max': end_timestamp_iso,
        'interval': interval,
        'vessel_id': list_of_vessels,
    }
    vessel_positions = requests.get(url = URL1, params = PARAMS)
    vessel_draughts = requests.get(url = URL2, params = PARAMS)
    vessel_declared_destinations = requests.get(url = URL3, params = PARAMS)

    
    data1 = vessel_positions.json()
    data2 = vessel_draughts.json()
    data3 = vessel_declared_destinations.json()

    
    return data1, data2, data3

# Function to group vessels into a specified group size and query each group
def vessel_position_history(group, vessel_class, list_of_vessels, start_timestamp, end_timestamp, interval):
    
    if vessel_class!=None:
        # Obtain vessel information and list of IDs for specified classes
        vessels_df=v.Vessels().search(vessel_classes=vessel_class).to_df()
        vessel_list=list(vessels_df['id'].unique())
        
    else:
        vessel_list=list_of_vessels
    
    # Initialise data frame to store data
    store=pd.DataFrame()
    
    # loop through each group of x vessels
    for x in range(0, len(vessel_list), group):
        
        temp_vessels=vessel_list[x:x+group]

        # query signals data for current group of vessels
        data = extract_vessel_positions(list_of_vessels=temp_vessels, 
                                        start_timestamp=start_timestamp, 
                                        end_timestamp=end_timestamp, 
                                        interval=interval)
        
        one=pd.DataFrame(data[0]['data'])
        two=pd.DataFrame(data[1]['data'])
        three=pd.DataFrame(data[2]['data'])
        
        one['timestamp']=pd.to_datetime(one['timestamp'])
        two['start_timestamp']=pd.to_datetime(two['start_timestamp'])
        three['timestamp']=pd.to_datetime(three['timestamp'])

        one['date'] = one['timestamp'].dt.date
        two['start_date'] = two['start_timestamp'].dt.date
        three['date']=three['timestamp'].dt.date
        
        three = three.drop_duplicates(subset=['vessel_id', 'date'])

        four=pd.merge(one, two, left_on=['vessel_id', 'date'], right_on=['vessel_id', 'start_date'], how='left')

        five=pd.merge(four, three, on=['vessel_id', 'date'], how='left')

        six=five.fillna('')

        seven=six[['lat_x', 'lon_x', 'vessel_id', 'timestamp_x', 
                   'heading', 'speed', 'value', 
                   'declared_destination', 'declared_eta']]


        seven=seven.copy()
        seven.rename(columns={'lat_x':'lat', 'lon_x':'lon', 
                              'timestamp_x':'timestamp', 'value':'draught'}, 
                     inplace=True)
        
        df2=seven
        
        # append to inital data frame
        store=pd.concat([store, df2])
        
        # if not all vessels done, print progress
        if ((x+group) < len(vessel_list)):

            print((x+group), 'vessels done out of', len(vessel_list), 'at:', datetime.today())
        
        # if all vessels done, print completion note
        else:
            print(len(vessel_list), 'vessels done out of', len(vessel_list), 'at:', datetime.today())
    
    # reset index
    store.reset_index(drop=True, inplace=True)
    
    return store


def merge_voyages_and_signals(start_y, start_m, start_d, origin, origin_excl, destination, destination_excl, prod, prod_excl, vessels, vessels_excl, unit, status, group, interval):
 
    # Set dates
    start=datetime(start_y, start_m, start_d)
    end=datetime.today()  
    
    # query vortexa vessels
    vessels_df = v.Vessels().search().to_df() #Creating dataframe of all Vortexa vessels 

    print("starting voyages pull at:", datetime.today())
    
    # query voyages
    voyages=v.VoyagesSearchEnriched().search(
            time_min=start,
            time_max=end,
            voyage_status=status, 
            latest_products=prod,
            latest_products_excluded=prod_excl,
            origins=origin,
            origins_excluded=origin_excl,
            destinations=destination,
            destinations_excluded=destination_excl,
            vessels=vessels,
            vessels_excluded=vessels_excl,
            unit=unit,
            columns=['vessel_name', 'imo', 'voyage_id', 'dwt', 'quantity', 
                     'latest_product', 'latest_product_group', 
                     'latest_product_category','latest_product_grade',
                     'first_origin_port', 'first_origin_country', 
                     'first_origin_shipping_region', 'final_destination_port', 
                     'final_destination_country', 'final_destination_shipping_region', 
                     'start_date', 'end_date', 'voyage_status']).to_df()
    
    print("finished voyages pull at:", datetime.today())    
    
    # rename columns
    vessels_df.rename(columns={'imo' : 'IMO'}, inplace=True) #Renaming IMO column to match header of voyages column 

     
    # convert IMOs to numeric values
    voyages['IMO'] = pd.to_numeric(voyages['IMO'])
    vessels_df["IMO"]= pd.to_numeric(vessels_df['IMO'])

    imo_list = list(voyages['IMO'].unique()) #creating list of unique voyages 

    vessels_df_2 = vessels_df[vessels_df['IMO'].isin(imo_list)] #Vessel info for each vessel that appears in the voyages query 

    vessel_ids = list(vessels_df_2['id'].unique()) # Obtain list of IDs for vessels in question
    
    print(len(vessel_ids), 'vessels to query')

    # merge voyages and vessel info (need to do this to get vessel ID in voyages output)
    merged=pd.merge(voyages, vessels_df_2[['id', 'IMO']], on='IMO', how='left')
    print("starting signals pull at:", datetime.today())

    # query signals
    positions=vessel_position_history(group=group, vessel_class=None, 
                                              list_of_vessels=vessel_ids,
                                              start_timestamp=start, 
                                              end_timestamp=end, 
                                              interval=interval)
    print("finished signals pull at:", datetime.today())
    
    # convert voyage and signals dates to datetime values
    merged['START DATE']=pd.to_datetime(merged['START DATE'], utc=True)
    merged['END DATE']=pd.to_datetime(merged['END DATE'], utc=True)
    positions['date']=pd.to_datetime(positions['timestamp'])

    # rename columns
    merged.rename(columns={'id':'vessel_id'}, inplace=True)
    merged['vessel_id']=merged['vessel_id'].str[0:16] #Preparing first 16 characters from long ID in order to match short IDs in crude_postions dataframe 
    
    
    # merge on vessel_id
    merged_df = pd.merge(positions, merged, on='vessel_id', how='left')

    # filter rows where timestamp falls within the start and end date of voyages
    filtered_df = merged_df[(merged_df['date'] >= merged_df['START DATE']) & (merged_df['date'] <= merged_df['END DATE'])]

    # select relevant columns
    final_columns = ['lat', 'lon', 'vessel_id', 'timestamp', 'heading', 'speed', 
                     'draught', 'declared_destination','declared_eta', 
                     'VESSEL NAME', 'IMO', 'VOYAGE ID', 'DWT (t)', 'QUANTITY (t)', 'LATEST PRODUCT', 
                     'LATEST PRODUCT GROUP', 'LATEST PRODUCT CATEGORY', 'LATEST PRODUCT GRADE', 
                     'FIRST ORIGIN PORT', 'FIRST ORIGIN COUNTRY', 'FIRST ORIGIN SHIPPING REGION', 
                     'FINAL DESTINATION PORT', 'FINAL DESTINATION COUNTRY', 
                     'FINAL DESTINATION SHIPPING REGION', 'START DATE', 'END DATE', 'VOYAGE STATUS']
    final = filtered_df[final_columns]

    # ensure numeric columns are of data type numeric
    final=final.copy()
    final['QUANTITY (t)']=pd.to_numeric(final['QUANTITY (t)'])
    final['lat']=pd.to_numeric(final['lat'])
    final['lon']=pd.to_numeric(final['lon'])
    final['heading']=pd.to_numeric(final['heading'])
    final['speed']=pd.to_numeric(final['speed'])
    
    final['is_cabotage'] = final['FIRST ORIGIN COUNTRY'] == final['FINAL DESTINATION COUNTRY']
    final['no. of vessels']=1


    final.reset_index(drop=True, inplace=True)
    
    # Identify columns for duplicate identification
    duplicate_cols = ['lat', 'lon', 'vessel_id', 'timestamp', 'heading', 'speed', 'declared_eta', 'declared_destination']

    # Group by the columns and keep the row with the highest start_date
    df_unique = final.loc[final.groupby(duplicate_cols)['START DATE'].idxmax()]
    
    df_unique['timestamp'] = pd.to_datetime(df_unique['timestamp'])
    
    df_unique.sort_values(by='timestamp', ascending=True, inplace=True)

    # Reset the index for better readability
    df_unique = df_unique.reset_index(drop=True)
    
    return df_unique

In [22]:

vessels=v.Vessels().search(vessel_classes='oil_aframax').to_df()
vessels_list=list(vessels['id'])[0:100]


test=merge_voyages_and_signals(start_y=2024, start_m=5, start_d=1, 
                               origin=None, origin_excl=None, 
                               destination=None, destination_excl=None, 
                               prod=crude, prod_excl=None, unit='t',
                               vessels='oil_aframax_lr2', vessels_excl=None, 
                               status=None, group=100, interval='1d')

starting voyages pull at: 2024-06-25 14:11:29.286650
finished voyages pull at: 2024-06-25 14:11:37.311063
793 vessels to query
starting signals pull at: 2024-06-25 14:11:37.331231
100 vessels done out of 793 at: 2024-06-25 14:11:42.591528
200 vessels done out of 793 at: 2024-06-25 14:11:51.326785
300 vessels done out of 793 at: 2024-06-25 14:11:56.869270
400 vessels done out of 793 at: 2024-06-25 14:12:01.709078
500 vessels done out of 793 at: 2024-06-25 14:12:09.123651
600 vessels done out of 793 at: 2024-06-25 14:12:14.678199
700 vessels done out of 793 at: 2024-06-25 14:12:20.646894
793 vessels done out of 793 at: 2024-06-25 14:12:25.551174
finished signals pull at: 2024-06-25 14:12:25.555364

In [23]:

test

Out[23]:

	lat	lon	vessel_id	timestamp	heading	speed	draught	declared_destination	declared_eta	VESSEL NAME	...	FIRST ORIGIN COUNTRY	FIRST ORIGIN SHIPPING REGION	FINAL DESTINATION PORT	FINAL DESTINATION COUNTRY	FINAL DESTINATION SHIPPING REGION	START DATE	END DATE	VOYAGE STATUS	is_cabotage	no. of vessels
0	57.006513	11.712312	c218232be16e8a5b	2024-05-01 00:00:00+00:00	161.7	10.9		EETLL	2024-05-05T03:00:00.000Z	MARCH	...	India	India West Coast	Primorsk (Koivisto) [RU]	Russia	Baltic	2024-02-13 00:00:00+00:00	2024-05-14 00:00:00+00:00	Ballast	False	1
1	4.993667	104.059839	183c2df9eeb4b7b3	2024-05-01 00:00:00+00:00	152.9	10.3				PACIFIC JEWELS	...	Thailand	Southeast Asia (SEA)	Cossack, WA [AU]		Oceania	2024-04-26 00:00:00+00:00	2024-05-14 00:00:00+00:00	Ballast	False	1
2	-6.247512	108.461041	2b9f8d4aead350ed	2024-05-01 00:00:00+00:00	64.4	0.2				DF MYSTRAS	...	Gabon	West Africa (WAf)	Balongan [ID]	Indonesia	Southeast Asia (SEA)	2024-03-23 00:00:00+00:00	2024-05-03 00:00:00+00:00	Laden	False	1
3	19.765696	-83.432494	1d1bc37238bf7c21	2024-05-01 00:00:00+00:00	132.0	12.0		PACHG	2024-05-04T11:00:00.000Z	TORM HERMIA	...	United States	Gulf of Mexico (GoM)	Chiriqui Grande [PA]	Panama	Caribbean	2024-04-26 00:00:00+00:00	2024-05-05 00:00:00+00:00	Laden	False	1
4	36.497196	14.523806	2ea187736a936e2a	2024-05-01 00:00:00+00:00	110.5	11.0		FR FOS > TR CKZ	2024-05-04T22:00:00.000Z	SAFEEN ELONA	...	France	Mediterranean (Med)	Novorossiysk [RU]	Russia	Black Sea and Caspian	2024-04-29 00:00:00+00:00	2024-05-10 00:00:00+00:00	Ballast	False	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
35469	29.834449	123.188475	dedf013f49aff7ad	2024-06-25 00:00:00+00:00	10.4	0.1		KOZMINO	2024-06-29T14:00:00.000Z	HUIHAI PACIFIC	...	China	Northeast Asia (NEA)	Kozmino [RU]	Russia	Russia Far East	2024-06-23 00:00:00+00:00	2024-07-01 00:00:00+00:00	Ballast	False	1
35470	59.183101	2.802509	d95b68b92c36d55f	2024-06-25 00:00:00+00:00	16.9	0.0		BALDER(NOR)	2024-06-27T04:00:00.000Z	SCOTT SPIRIT	...	Norway	Northwest Europe (NWE)	Liverpool [GB]	United Kingdom	Northwest Europe (NWE)	2024-06-15 00:00:00+00:00	2024-06-28 00:00:00+00:00	Laden	False	1
35471	-26.694409	153.257438	258c6e78fd9784c2	2024-06-25 00:00:00+00:00	145.1	0.0		AU BNE	2024-06-21T12:00:00.000Z	ESTEEM COWBOY	...	United States	Gulf of Mexico (GoM)	Brisbane Qld [AU]		Oceania	2024-04-28 00:00:00+00:00	2024-06-28 00:00:00+00:00	Laden	False	1
35472	59.913154	28.633176	3d0f10239fc55a1d	2024-06-25 00:00:00+00:00	280.0	0.0		RUULU	2024-06-12T03:00:00.000Z	ROBON	...	Turkey	Mediterranean (Med)	Primorsk (Koivisto) [RU]	Russia	Baltic	2024-05-28 00:00:00+00:00	2024-06-25 00:00:00+00:00	Ballast	False	1
35473	34.301610	31.089419	fbd923996d68d310	2024-06-25 00:00:00+00:00	117.0	6.3		LIMASSOL	2024-06-26T23:00:00.000Z	FOS DA VINCI	...	Turkey	Mediterranean (Med)	Sidi Kerir Terminal [EG]	Egypt	Mediterranean (Med)	2024-05-05 00:00:00+00:00	2024-07-01 00:00:00+00:00	Ballast	False	1

35474 rows × 29 columns

In [24]:

df1 = test

fig = px.density_mapbox(df1, lon = 'lon', lat = 'lat', z = 'no. of vessels', 
                        radius = 10,
                        center = dict(lon = 25,lat = 9),
                        zoom = 1.1,
                        mapbox_style='carto-darkmatter',
                        color_continuous_scale = 'dense',
                        animation_frame = 'timestamp',
                        labels = {"date":"Date"})

fig.update_layout(title = 'Aframaxes since May 2024',
                  title_font = dict(size = 18, color = 'black'),
                  title_x = 0.5)

fig.show()

In [ ]: