#!/usr/bin/env python # coding: utf-8 # In[18]: import pandas as pd from matplotlib import pyplot as plt from IPython.display import display get_ipython().run_line_magic('matplotlib', 'inline') plt.rcParams['figure.figsize'] = (20.0, 10.0) rhi_csv = 'tabula-RHI-beneficiaries-non-domestic-individuals-companies.csv' # In[30]: df = pd.read_csv(rhi_csv) df['Date'] = pd.to_datetime(df['Date of\rApplication'], format='%d/%m/%Y') df['Cash'] = df['Amount of payments\rmade to 28 February\r2017 (£)*'].replace('[£,]','', regex=True).astype(float) df.drop(['Date of\rApplication','Amount of payments\rmade to 28 February\r2017 (£)*'], axis=1, inplace=True) df.rename(columns={'Business or\rInstallation\rLocation':'Postcode', 'Installation\rCapacity\r(kWth)':'Capacity (kWth)','Technology Type':'Type','Name':'Company'}, inplace=True) display(df.head()) display(df.dtypes) # In[36]: df[df.Postcode.isnull()] # In[37]: df.Cash.sum() # In[ ]: