#!/usr/bin/env python # coding: utf-8 # # Module 7 # # ## Video 33: Analysing Imports/Exports Data # **Python for the Energy Industry** # # We will now extend the work of the previous lesson towards and example of analysing data on imports and exports. # # # [Cargo Time Series documentation.](https://vortechsa.github.io/python-sdk/endpoints/cargo_timeseries/) # # In[1]: # initial imports import pandas as pd import numpy as np from datetime import datetime from dateutil.relativedelta import relativedelta import vortexasdk as v # The cargo unit for the time series (barrels) TS_UNIT = 'b' # The granularity of the time series TS_FREQ = 'day' # datetimes to access last 7 weeks of data now = datetime.utcnow() seven_weeks_ago = now - relativedelta(weeks=7) # Let's grab the Chinese imports data, as we did in the previous lesson: # In[2]: # Find China ID china = [g.id for g in v.Geographies().search('china').to_list() if 'country' in g.layer] assert len(china) == 1 search_result = v.CargoTimeSeries().search( timeseries_frequency=TS_FREQ, timeseries_unit=TS_UNIT, filter_destinations=china, filter_time_min=seven_weeks_ago, filter_time_max=now, filter_activity="unloading_state", ) imports_df = search_result.to_df().rename(columns={'key':'date','value':'total'})[['date','total']] # In[3]: imports_df.head() # Now we'll break this data up by the exporting shipping region. First, we get the names and IDs of each shipping region: # In[4]: shipping_regions = [g for g in v.Geographies().search() if 'shipping_region' in g['layer']] shipping_region_names = [sr['name'] for sr in shipping_regions] shipping_region_IDs = [sr['id'] for sr in shipping_regions] # zip into a dictionary shipping_region_dict = dict(zip(shipping_region_names,shipping_region_IDs)) print(len(shipping_region_dict)) # We can now loop over each shipping region, and add the time series data for exported by that region on to our DataFrame. # In[5]: for shipping_region in shipping_region_dict: search_result = v.CargoTimeSeries().search( timeseries_frequency=TS_FREQ, timeseries_unit=TS_UNIT, filter_destinations=china, filter_origins=shipping_region_dict[shipping_region], filter_time_min=seven_weeks_ago, filter_time_max=now, filter_activity="unloading_state", ) df_sr = search_result.to_df() imports_df[shipping_region] = df_sr['value'] # Let's take a peek at the top exporting regions among Chinese imports: # In[6]: imports_df.sum().sort_values(ascending=False).head(10) # In[7]: top10 = imports_df.sum().sort_values(ascending=False).head(10) ax = imports_df.plot(x='date',y=top10.index.values,figsize=(8,6)) ax.set_ylabel('Imports to China (barrels)') # ### Exercise # # Try applying this analysis to US imports. # In[ ]: