#!/usr/bin/env python # coding: utf-8 # # Module 6 # # ## Video 29: Working with Aggregated Cargo Movements Data # **Python for the Energy Industry** # # In this lesson, we will be working with the data from the previous lesson. We will practice visualising this data. # # [Cargo Movements documentation](https://vortechsa.github.io/python-sdk/endpoints/cargo_movements/) # # To start we follow the steps to get our Cargo Movements DataFrame: # In[1]: # initial imports import pandas as pd import numpy as np from datetime import datetime from dateutil.relativedelta import relativedelta import vortexasdk as v # datetimes to access last 7 weeks of data now = datetime.utcnow() seven_weeks_ago = now - relativedelta(weeks=7) # In[2]: # Find US ID us = [g.id for g in v.Geographies().search('united states').to_list() if 'country' in g.layer] assert len(us) == 1 # Find crude ID crude = [p.id for p in v.Products().search('crude').to_list() if p.name=='Crude'] assert len(crude) == 1 # Columns to pull out, and shortened names required_columns = ["vessels.0.name","vessels.0.vessel_class","product.group.label","product.category.label","quantity", "status","events.cargo_port_load_event.0.location.port.label","events.cargo_port_load_event.0.end_timestamp", "events.cargo_port_unload_event.0.location.port.label","events.cargo_port_unload_event.0.location.country.label", "events.cargo_port_unload_event.0.end_timestamp"] new_labels = ["vessel_name","vessel_class","product_group","product_category","quantity","status", "loading_port","loading_finish","unloading_port","unloading_country","unloading_finish"] relabel = dict(zip(required_columns,new_labels)) cms = v.CargoMovements().search( filter_activity = 'loading_end', filter_origins = us, exclude_destinations = us, filter_products = crude, filter_time_min = seven_weeks_ago, filter_time_max = now, cm_unit = 'b' ).to_df(columns=required_columns).rename(relabel,axis=1) cms['loading_week'] = cms['loading_finish'].dt.isocalendar().week # Let's start by making a bar chart of weekly exports: # In[3]: weekly_quantity = cms.groupby('loading_week').sum() ax = weekly_quantity.plot.bar(y='quantity',legend=False,figsize=(8,6)) ax.set_xlabel('Week') ax.set_ylabel('US exports (bbl)') # By assigning the plot to the variable `ax` we can make some further tweaks, like setting the x and y axis labels. # # What if we wanted to represent the breakdown by product category? These can be plotted with bars side-by-side: # In[4]: quantity_by_category = cms.groupby(by = ['loading_week','product_category']).sum().reset_index() quantity_by_category = quantity_by_category.pivot(index = 'loading_week',columns = 'product_category',values = 'quantity') quantity_by_category = quantity_by_category.fillna(0) ax = quantity_by_category.plot.bar(figsize=(8,6)) ax.set_xlabel('Week') ax.set_ylabel('US exports (bbl)') # As there are many products with zero exports, this leaves a lot of holes in the plot. A better way to represent this is 'stacked': # In[5]: ax = quantity_by_category.plot.bar(stacked=True,figsize=(8,6)) ax.set_xlabel('Week') ax.set_ylabel('US exports (bbl)') # What about visualising the share of exports to destination countries? A pie chart would be suitable for this. # In[6]: quantity_by_destination = cms.groupby('unloading_country').sum()[['quantity']] quantity_by_destination.sort_values(by='quantity',ascending = False, inplace=True) top_destination_countries = quantity_by_destination.head(10) rest = pd.DataFrame(index = ['Other'], columns = ['quantity']) rest.loc['Other'] = quantity_by_destination[10:].sum().values top_destination_countries = pd.concat([top_destination_countries, rest]).astype(int) top_destination_countries['%'] = round(top_destination_countries['quantity']*100 / top_destination_countries['quantity'].sum(),2) top_destination_countries.plot.pie(y='%',figsize=(6,6),legend=False,autopct='%.0f') # Another type of plot we can make is the histogram, which shows the distribution of values in a column. Here's the distribution of quantity: # In[7]: cms.plot.hist(y='quantity',bins=30) # Do we think this distribution is different for different products? We can test by pivoting. # In[8]: cms_product = cms.pivot(columns = 'product_category',values = 'quantity')[['Heavy-Sour','Light-Sweet','Medium-Sour']] cms_product.plot.hist(bins=30) # ### Exercise # # Instead of US crude exports, pick a different dataset to examine. Say, Saudi Arabian exports, or Chinese imports. Follow the steps of the last 2 lessons to aggregate and visualise different aspects of this data. # In[ ]: