This file covers german power plants. It downloads the power plant list from the BNetzA and augments it with more information.
import urllib.request
import csv
import pandas as pd
import numpy as np
import posixpath
import urllib.parse
import datetime
import re
import os.path
import yaml # http://pyyaml.org/, pip install pyyaml, conda install pyyaml
import json
import subprocess
from bokeh.charts import Scatter, show
from bokeh.io import output_notebook
output_notebook()
%matplotlib inline
import logging
logger = logging.getLogger('notebook')
logger.setLevel('INFO')
nb_root_logger = logging.getLogger()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%d %b %Y %H:%M:%S')
nb_root_logger.handlers[0].setFormatter(formatter)
#create download and output folder if they do not exist
if not os.path.exists('downloads/'): os.makedirs('downloads/')
if not os.path.exists('output/'): os.makedirs('output/')
if not os.path.exists('output/datapackage_powerplants_germany'): os.makedirs('output/datapackage_powerplants_germany')
# BNetzA Power plant list
url_bnetza = 'http://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_CSV.csv?__blob=publicationFile&v=5'
# UBA Power plant list
url_uba = 'http://www.umweltbundesamt.de/sites/default/files/medien/376/dokumente/kraftwerke_in_deutschland_ab_100_megawatt_elektrischer_leistung_2015_09.xls'
This section defines functions used multiple times within this script
def downloadandcache(url):
"""This function downloads a file into a folder called
downloads and returns the local filepath."""
path = urllib.parse.urlsplit(url).path
filename = posixpath.basename(path)
now = datetime.datetime.now()
datestring = ""
datestring = str(now.year)+"-"+str(now.month)+"-"+str(now.day)
filepath = "downloads/"+datestring+"-"+filename
#check if file exists, otherwise download it
if os.path.exists(filepath) == False:
print("Downloading file", filename)
urllib.request.urlretrieve(url, filepath)
else:
print("Using local file from", filepath)
filepath = './'+filepath
return filepath
This section downloads the BNetzA power plant list and converts it to a pandas data frame
bnetza_data_filepath=(downloadandcache(url_bnetza))
#print(bnetza_data_filepath)
plantlist=pd.read_csv(bnetza_data_filepath,
skiprows=9,
sep=';', # CSV field separator, default is ','
thousands='.', # Thousands separator, default is ','
decimal=',', # Decimal separator, default is '.')
encoding='cp1252')
plantlist.head()
uba_data_filepath=(downloadandcache(url_uba))
plantlist_uba=pd.read_excel(uba_data_filepath,
skiprows=9
)
plantlist_uba.head()
A dictionary with the original column names to the new column names is created. This dictionary is used to translate the column names.
Original Name | Translation |
---|---|
Kraftwerksnummer Bundesnetzagentur | id |
Unternehmen | company |
Kraftwerksname | name |
PLZ\n(Standort Kraftwerk) | postcode |
Ort\n(Standort Kraftwerk) | city |
Straße und Hausnummer (Standort Kraftwerk) | street |
Bundesland | state |
Blockname | block |
Aufnahme der kommerziellen Stromerzeugung der derzeit in Betrieb befindlichen Erzeugungseinheit\n(Jahr) | commissioned |
Kraftwerksstatus \n(in Betrieb/\nvorläufig stillgelegt/\nsaisonale Konservierung\nReservekraftwerk/\nSonderfall) | status |
Energieträger | fuel_basis |
Spezifizierung "Mehrere Energieträger" und "Sonstige Energieträger" - Hauptbrennstoff | fuel_multiple1 |
Spezifizierung "Mehrere Energieträger" - Zusatz- / Ersatzbrennstoffe | fuel_multiple2 |
Auswertung\nEnergieträger (Zuordnung zu einem Hauptenergieträger bei Mehreren Energieträgern) | fuel |
Vergütungsfähig nach EEG\n(ja/nein) | eeg |
Wärmeauskopplung (KWK)\n(ja/nein) | chp |
Netto-Nennleistung (elektrische Wirkleistung) in MW | capacity |
Bezeichnung Verknüpfungspunkt (Schaltanlage) mit dem Stromnetz der Allgemeinen Versorgung gemäß Netzbetreiber | network_node |
Netz- oder Umspannebene des Anschlusses in kV | voltage |
Name Stromnetzbetreiber | network_operator |
dict_columns = { 'Kraftwerksnummer Bundesnetzagentur':'id',
'Unternehmen':'company',
'Kraftwerksname':'name',
'PLZ\n(Standort Kraftwerk)':'postcode',
'Ort\n(Standort Kraftwerk)':'city',
'Straße und Hausnummer (Standort Kraftwerk)':'street',
'Bundesland':'state',
'Blockname':'block',
'Aufnahme der kommerziellen Stromerzeugung der derzeit in Betrieb befindlichen Erzeugungseinheit\n(Jahr)':'commissioned',
'Kraftwerksstatus \n(in Betrieb/\nvorläufig stillgelegt/\nsaisonale Konservierung\nReservekraftwerk/\nSonderfall)':'status',
'Energieträger':'fuel_basis',
'Spezifizierung "Mehrere Energieträger" und "Sonstige Energieträger" - Hauptbrennstoff':'fuel_multiple1',
'Spezifizierung "Mehrere Energieträger" - Zusatz- / Ersatzbrennstoffe':'fuel_multiple2',
'Auswertung\nEnergieträger (Zuordnung zu einem Hauptenergieträger bei Mehreren Energieträgern)':'fuel',
'Vergütungsfähig nach EEG\n(ja/nein)':'eeg',
'Wärmeauskopplung (KWK)\n(ja/nein)':'chp',
'Netto-Nennleistung (elektrische Wirkleistung) in MW':'capacity',
'Bezeichnung Verknüpfungspunkt (Schaltanlage) mit dem Stromnetz der Allgemeinen Versorgung gemäß Netzbetreiber':'network_node',
'Netz- oder Umspannebene des Anschlusses in kV':'voltage',
'Name Stromnetzbetreiber':'network_operator',
'Kraftwerksname / Standort': 'uba_name',
'Betreiber ': 'uba_company',
'Standort-PLZ': 'uba_postcode',
'Kraftwerksstandort': 'uba_city',
'Elektrische Bruttoleistung (MW)': 'uba_capacity',
'Fernwärme-leistung (MW)': 'uba_chp_capacity',
'Inbetriebnahme (ggf. Ertüchtigung)': 'uba_commissioned',
'Anlagenart': 'uba_technology',
'Primärenergieträger': 'uba_fuel',
}
plantlist.rename(columns=dict_columns, inplace=True)
# Check if all columns have been translated
for columnnames in plantlist.columns:
#print(columnnames)
if not columnnames in dict_columns.values():
logger.error("Untranslated column: "+ columnnames)
dict_fuels = {'Steinkohle':'coal',
'Erdgas':'gas',
'Braunkohle':'lignite',
'Kernenergie':'uranium',
'Pumpspeicher':'pumped_storage',
'Biomasse':'biomass',
'Mineralölprodukte':'oil',
'Laufwasser':'run_of_river',
'Sonstige Energieträger\n(nicht erneuerbar) ':'other_non_renewable',
'Abfall':'waste',
'Speicherwasser (ohne Pumpspeicher)':'reservoir',
'Unbekannter Energieträger\n(nicht erneuerbar)':'unknown_non_renewable',
'Mehrere Energieträger\n(nicht erneuerbar)':'multiple_non_renewable',
'Deponiegas':'gas_landfill',
'Windenergie (Onshore-Anlage)':'wind_onshore',
'Windenergie (Offshore-Anlage)':'wind_offshore',
'Solare Strahlungsenergie':'solar',
'Klärgas':'gas_sewage',
'Geothermie':'geothermal',
'Grubengas':'gas_mine'
}
plantlist["fuel"].replace(dict_fuels, inplace=True)
plantlist["fuel"].unique()
# Check if all fuels have been translated
for fuelnames in plantlist["fuel"].unique():
#print(columnnames)
if not fuelnames in dict_fuels.values():
logger.error("Untranslated fuel: "+ fuelnames)
dict_plantstatus ={
'in Betrieb':'operating',
'vorläufig stillgelegt':'shutdown_temporal',
'Sonderfall':'special_case',
'saisonale Konservierung':'seasonal_conservation',
'Reservekraftwerk':'reserve',
'Endgültig Stillgelegt 2011':'shutdown_2011',
'Endgültig Stillgelegt 2012':'shutdown_2012',
'Endgültig Stillgelegt 2013':'shutdown_2013',
'Endgültig Stillgelegt 2014':'shutdown_2014',
'Endgültig Stillgelegt 2015':'shutdown_2015',
'Endgültig stillgelegt 2015':'shutdown_2015',
}
plantlist["status"].replace(dict_plantstatus, inplace=True)
plantlist["status"].unique()
# Check if all fuels have been translated
for statusnames in plantlist["status"].unique():
#print(columnnames)
if not statusnames in dict_plantstatus.values():
logger.error("Untranslated plant status: "+ statusnames)
dict_yesno ={
'Nein':'no',
'nein':'no',
'Ja':'yes',
'ja':'yes',
}
plantlist["chp"].replace(dict_yesno, inplace=True)
plantlist["chp"].unique()
# Check if all fuels have been translated
for chpnames in plantlist["chp"].unique():
if (not chpnames in dict_yesno.values()) & (str(chpnames) != "nan"):
logger.error("Untranslated chp capability: " + str(chpnames))
plantlist["eeg"].replace(dict_yesno, inplace=True)
plantlist["eeg"].unique()
# Check if all fuels have been translated
for eegnames in plantlist["eeg"].unique():
if (not eegnames in dict_yesno.values()) & (str(eegnames) != "nan"):
logger.error("Untranslated EEG type: " + str(eegnames))
Using the same dictionary as above
dict_uba_columns = {
'Kraftwerksname / Standort': 'uba_name',
'Betreiber ': 'uba_company',
'Standort-PLZ': 'uba_postcode',
'Kraftwerksstandort': 'uba_city',
'Elektrische Bruttoleistung (MW)': 'uba_capacity',
'Fernwärme-leistung (MW)': 'uba_chp_capacity',
'Inbetriebnahme (ggf. Ertüchtigung)': 'uba_commissioned',
'Anlagenart': 'uba_technology',
'Primärenergieträger': 'uba_fuel',
'Bundesland':'uba_state',
}
plantlist_uba.rename(columns=dict_uba_columns, inplace=True)
# Check if all columns have been translated
for columnnames in plantlist_uba.columns:
#print(columnnames)
if not columnnames in dict_uba_columns.values():
logger.error("Untranslated column: "+ columnnames)
#Prepare for matching
plantlist_uba['uba_id_string'] = plantlist_uba['uba_name'] + '_' + plantlist_uba['uba_fuel']
# Translate UBA Technologies here
# Set Index to Kraftwerksnummer_Bundesnetzagentur
plantlist['bnetza_id'] = plantlist['id']
plantlist = plantlist.set_index('id')
# Add comment column
plantlist['comment'] = ' '
plantlist.head()
# UBA List import here
matchinglist=pd.read_csv('inputs/matching_bnetza_uba.csv',
skiprows=0,
sep=';', # CSV field separator, default is ','
thousands='.', # Thousands separator, default is ','
decimal=',', # Decimal separator, default is '.')
encoding='cp1252')
matchinglist['uba_id_string'] = matchinglist['uba_match_name'] + '_' + matchinglist['uba_match_fuel']
matchinglist.head()
#Check for cases:
#1-1 One BNetzA ID to one UBA-ID
match1t1 = matchinglist[(matchinglist.duplicated(subset=['uba_id_string'], keep=False) == False) & (matchinglist.duplicated(subset=['ID BNetzA'], keep=False)== False)]
match1t1 = pd.merge(match1t1, plantlist_uba, left_on='uba_id_string', right_on='uba_id_string', how='left')
match1t1 = match1t1.set_index('ID BNetzA')
match1t1.head()
#Add comment
match1t1['merge_comment'] = 'List matching type: Single UBA power plant assigned to single BNetzA power plant'
#Match multiple BNetza IDs to One UBA ID
# Matching structure:
# bnetza_id uba_id
# 1 1
# 2 1
# 3 1
# 4 2
# 5 2
# Get relevant entries from the matchinglist and merge the corresponding UBA Data to the list.
matchnt1= matchinglist[(matchinglist.duplicated(subset=['uba_id_string'], keep=False) == True) & (matchinglist.duplicated(subset=['ID BNetzA'], keep=False)== False)]
matchnt1 = pd.merge(matchnt1, plantlist_uba, left_on='uba_id_string', right_on='uba_id_string', how='left')
matchnt1 = matchnt1.set_index('ID BNetzA')
# Import BNetzA Capacities and CHP criterion into matchnt1 dataframe
plantlist_capacities = pd.DataFrame(plantlist[['capacity','chp']])
plantlist_capacities=plantlist_capacities.rename(columns = {'capacity':'capacity_bnetza'})
plantlist_capacities=plantlist_capacities.rename(columns = {'chp':'chp_bnetza'})
#print(plantlist_capacities)
matchnt1 = pd.merge(matchnt1, plantlist_capacities, left_index=True, right_index=True, how='left')
# Get sum of BNetzA Capacitites for each UBA Index and merge into matchnt1 dataframe
plantlist_uba_capacitysum = pd.DataFrame(matchnt1.groupby('uba_id_string').sum()['capacity_bnetza'])
plantlist_uba_capacitysum=plantlist_uba_capacitysum.rename(columns ={'capacity_bnetza':'capacity_bnetza_aggregate'})
matchnt1 = pd.merge(matchnt1, plantlist_uba_capacitysum, left_index=True,right_index=True, how='left')
#matchnt1 = pd.merge(matchnt1, plantlist_uba_capacitysum, left_index=True, right_on='uba_id_string', how='left')
# Scale UBA Capacities based BNEtza Data
matchnt1['uba_capacity_scaled'] = matchnt1['uba_capacity'] * matchnt1['capacity_bnetza']/matchnt1['capacity_bnetza_aggregate']
#determine sum of capacities with chp capability and add to matchnt1
plantlist_uba_chp_capacities = matchnt1[(matchnt1['chp_bnetza'] == 'yes')]
plantlist_uba_chp_capacitysum = pd.DataFrame(plantlist_uba_chp_capacities.groupby('uba_id_string').sum()[['capacity_bnetza']])
plantlist_uba_chp_capacitysum = plantlist_uba_chp_capacitysum.rename(columns = {'capacity_bnetza':'capacity_bnetza_with_chp'})
plantlist_uba_chp_capacitysum
matchnt1 = pd.merge(matchnt1, plantlist_uba_chp_capacitysum, left_index=True, right_index=True, how='left')
matchnt1['uba_chp_capacity_scaled'] = matchnt1['uba_chp_capacity'] * matchnt1['capacity_bnetza']/matchnt1['capacity_bnetza_with_chp']
# Change column names for merge later on
matchnt1['uba_chp_capacity_original'] = matchnt1['uba_chp_capacity']
matchnt1['uba_chp_capacity'] = matchnt1['uba_chp_capacity_scaled']
matchnt1['uba_capacity_original'] = matchnt1['uba_capacity']
matchnt1['uba_capacity'] = matchnt1['uba_capacity_scaled']
#Add comment
matchnt1['merge_comment'] = 'List matching type: UBA capacity distributed proportionally to multiple BNetzA power plants'
# Drop columns not needed anymore
colsToDrop = ['capacity_bnetza', 'chp_bnetza','capacity_bnetza_with_chp', 'capacity_bnetza_aggregate', 'uba_chp_capacity_scaled', 'uba_capacity_scaled']
matchnt1 = matchnt1.drop(colsToDrop, axis=1)
matchnt1.head()
#1-n Case here
#The resulting DataFrame should be called "match1tn"
# Matching structure:
# bnetza_id uba_id
# 1 1
# 1 2
# 1 3
# 2 4
# 2 5
# Get relevant entries from the matchinglist and merge the corresponding UBA Data to the list.
match1tn= matchinglist[(matchinglist.duplicated(subset=['ID BNetzA'], keep=False) == True) & (matchinglist.duplicated(subset=['uba_id_string'], keep=False)== False)]
match1tn = pd.merge(match1tn, plantlist_uba, left_on='uba_id_string', right_on='uba_id_string', how='left')
match1tn = match1tn.set_index('ID BNetzA')
match1tn.head()
# Import BNetzA Capacities and CHP criterion into match1tn dataframe
plantlist_capacities = pd.DataFrame(plantlist[['capacity','chp']])
plantlist_capacities=plantlist_capacities.rename(columns = {'capacity':'capacity_bnetza'})
plantlist_capacities=plantlist_capacities.rename(columns = {'chp':'chp_bnetza'})
#plantlist_capacities.head()
#print(plantlist_capacities)
match1tn = pd.merge(match1tn, plantlist_capacities, left_index=True, right_index=True, how='left')
match1tn.index.names=['ID BNetzA']
match1tn.head()
# Get sum of UBA Capacitites for each BNetzA Index and merge into match1tn dataframe
plantlist_bnetza_capacitysum = pd.DataFrame(match1tn.groupby(match1tn.index).sum()['uba_capacity'])
plantlist_bnetza_capacitysum=plantlist_bnetza_capacitysum.rename(columns ={'uba_capacity':'uba_capacity_aggregate'})
#print(plantlist_uba_capacitysum)
match1tn = pd.merge(match1tn, plantlist_bnetza_capacitysum, left_index=True, right_index=True, how='left')
# Get sum of UBA CHP Capacities for each BNetzA Index and merge inot match1tn datafram
plantlist_bnetza_chp_capacitysum = pd.DataFrame(match1tn.groupby(match1tn.index).sum()['uba_chp_capacity'])
plantlist_bnetza_chp_capacitysum=plantlist_bnetza_chp_capacitysum.rename(columns ={'uba_chp_capacity':'uba_chp_capacity_aggregate'})
match1tn = pd.merge(match1tn, plantlist_bnetza_chp_capacitysum, left_index=True, right_index=True, how='left')
# Get UBA Technology for each BNetzA Index and merge into match1tn dataframe
## Option 1: Take all technologies and merge them
#match1tn['uba_technology_aggregate'] = pd.DataFrame(match1tn.groupby(match1tn.index).transform(lambda x: ', '.join(x))['uba_technology'])
## Option 2 (currently preferred): Take technology with highest occurence
match1tn['uba_technology_aggregate'] = pd.DataFrame(match1tn.groupby(match1tn.index)['uba_technology'].agg(lambda x:x.value_counts().index[0]))
# Get UBA Plant name
match1tn['uba_name_aggregate'] = pd.DataFrame(match1tn.groupby(match1tn.index).transform(lambda x: ', '.join(x))['uba_name'])
# Get UBA company name
match1tn['uba_company_aggregate'] = pd.DataFrame(match1tn.groupby(match1tn.index)['uba_company'].agg(lambda x:x.value_counts().index[0]))
# Change column names for merge later on
match1tn = match1tn.rename(columns={'uba_chp_capacity':'uba_chp_capacity_original','uba_capacity':'uba_capacity_original',
'uba_chp_capacity_aggregate':'uba_chp_capacity','uba_capacity_aggregate':'uba_capacity'})
#match1tn['uba_chp_capacity_original'] = match1tn['uba_chp_capacity_aggregate']
#match1tn['uba_capacity_original'] = match1tn['uba_capacity_aggregate']
#Add comment
match1tn['merge_comment'] = 'List matching type: Multiple UBA capacities aggregated to single BNetzA power plant'
# Drop columns not needed anymore
colsToDrop = ['capacity_bnetza', 'chp_bnetza']
match1tn = match1tn.drop(colsToDrop, axis=1)
# Drop duplicate rows and keep first entry
match1tn = match1tn.reset_index().drop_duplicates(subset='ID BNetzA', keep='first').set_index('ID BNetzA')
match1tn.head()
# Merge the UBA DataFrames
#Merge first two dataframes
plantlist_uba_for_merge = match1t1.append(matchnt1)
#Add third dataframe
plantlist_uba_for_merge = plantlist_uba_for_merge.append(match1tn)
#Merge plantlist_uba_for_merge into the plantlist
plantlist = pd.merge(plantlist, plantlist_uba_for_merge, left_index=True, right_index=True, how='left')
# Add Merge comment to comment column
plantlist['comment'] = plantlist['comment'] + plantlist['merge_comment']
plantlist = plantlist.drop('merge_comment', axis=1)
#plantlist_uba_for_merge
#plantlist.head()
#plantlist.columns
plantlist.loc[['BNA0073']]
# Delete unwanted fuels
plantlist = plantlist[plantlist.fuel != 'solar']
plantlist = plantlist[plantlist.fuel != 'wind_onshore']
plantlist = plantlist[plantlist.fuel != 'wind_offshore']
# Delete placeholder values
plantlist = plantlist[plantlist.company != 'EEG-Anlagen < 10 MW']
plantlist = plantlist[plantlist.company != 'Nicht-EEG-Anlagen < 10 MW']
# Add columns with empty data
plantlist['shutdown'] = 'NaN'
#plantlist['retrofit']= 'NaN'
# Fill shutdown data column based on Plant status
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2010', 2010, plantlist['shutdown'])
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2011', 2011, plantlist['shutdown'])
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2012', 2012, plantlist['shutdown'])
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2013', 2013, plantlist['shutdown'])
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2014', 2014, plantlist['shutdown'])
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2015', 2015, plantlist['shutdown'])
#plantlist['shutdown'] = np.where(plantlist['status'] == 'shutdown_2016', 2016, plantlist['shutdown'])
plantlist['shutdown'] = pd.to_numeric(plantlist['status'].str.extract('[\w].+(\d\d\d\d)'),errors='coerce')
plantlist['status'][plantlist['shutdown'] > 0] = 'shutdown'
# Fill retrofit data column
# Identify restrofit dates in UBA list
plantlist['retrofit'] = pd.to_numeric(plantlist['uba_commissioned'].str.extract('[(.+](\d\d\d\d)'),errors='coerce')
# Split multiple commissioning dates as listed in UBA
plantlist['uba_commissioned_1'] = pd.to_numeric(plantlist['uba_commissioned'].str.extract('(\d\d\d\d)'),errors='coerce')
plantlist['uba_commissioned_1'][plantlist['uba_commissioned_1'].isnull()] = pd.to_numeric(plantlist['uba_commissioned'].str.extract('(\d\d\d\d).+[\w]'),errors='coerce')
plantlist['uba_commissioned_2'] = pd.to_numeric(plantlist['uba_commissioned'].str.extract('[\w].+(\d\d\d\d).+[\w]'),errors='coerce')
plantlist['uba_commissioned_3'] = pd.to_numeric(plantlist['uba_commissioned'].str.extract('[\w].+(\d\d\d\d)'),errors='coerce')
plantlist['uba_commissioned_1'][plantlist['retrofit']==plantlist['uba_commissioned_1']] = ''
plantlist['uba_commissioned_2'][plantlist['retrofit']==plantlist['uba_commissioned_2']] = ''
plantlist['uba_commissioned_3'][plantlist['retrofit']==plantlist['uba_commissioned_3']] = ''
# Split multiple commissioning dates as listed in BNetzA
plantlist['commissioned_1'] = pd.to_numeric(plantlist['commissioned'].str.extract('(\d\d\d\d)'),errors='coerce')
plantlist['commissioned_1'][plantlist['commissioned_1'].isnull()] = pd.to_numeric(plantlist['commissioned'].str.extract('(\d\d\d\d).+[\w]'),errors='coerce')
plantlist['commissioned_2'] = pd.to_numeric(plantlist['commissioned'].str.extract('[\w].+(\d\d\d\d).+[\w]'),errors='coerce')
plantlist['commissioned_3'] = pd.to_numeric(plantlist['commissioned'].str.extract('[\w].+(\d\d\d\d)'),errors='coerce')
# Show plantlist
plantlist[plantlist['status']=='shutdown']
#plantlist.to_excel('power_plants_germany_tech.xlsx', sheet_name='output')
plantlist['capacity_float'] = pd.to_numeric(plantlist['capacity'],errors='coerce')
plantlist['commissioned_float'] = pd.to_numeric(plantlist[['commissioned','commissioned_1','commissioned_2','commissioned_3']].max(axis=1),errors='coerce')
plantlist['retrofit_float'] = pd.to_numeric(plantlist['retrofit'],errors='coerce')
plantlist.head()
# Split uba_technology information into technology (GT, CC,...) and type (HKW, IKW, ...)
plantlist['technology'] = plantlist['uba_technology']
plantlist['type'] = plantlist['uba_technology']
# Translate technologies
dict_technology = {
'GT': 'GT',
'GuD': 'CC',
'DKW': 'ST',
'LWK': 'ROR',
'PSW': 'PSP',
'DWR': 'ST', #Pressurized water reactor
'G/AK': 'GT', #GT with heat recovery
'SWR': 'ST', #boiling water reactor
'SWK': 'SPP', #storage power plant
'SSA': '', #bus bar
'HKW (DT)': 'ST',
'HKW / GuD': 'CC',
'GuD / HKW': 'CC',
'IKW / GuD': 'CC',
'IKW /GuD': 'CC',
'HKW / SSA': '',
'IKW / SSA': '',
'HKW': '',
'IKW': '',
'IKW / HKW': ''
}
plantlist["technology"].replace(dict_technology, inplace=True)
plantlist["technology"].unique()
# Check if all technologies have been translated
for technology in plantlist["technology"].unique():
if (not technology in dict_technology.values()) & (str(technology) != "nan"):
logger.error("Untranslated technology: " + str(technology))
# Translate types
dict_type = {
'HKW': 'CHP', #thermal power plant,
'HKW (DT)': 'CHP',
'IKW': 'IPP', #industrial power plant
'HKW / GuD': 'CHP',
'GuD / HKW': 'CHP',
'IKW / GuD': 'IPP',
'IKW /GuD': 'IPP',
'IKW / SSA': 'IPP',
'HKW / SSA': 'CHP',
'IKW / HKW': 'CHP',
'GT': '',
'GuD': '',
'DKW': '',
'LWK': '',
'PSW': '',
'DWR': '', #Pressurized water reactor
'G/AK': 'CHP', #GT with heat recovery
'SWR': '', #boiling water reactor
'SWK': '', #storage power plant
'SSA': '', #
}
plantlist["type"].replace(dict_type, inplace=True)
plantlist["type"].unique()
# Check if all types have been translated
for type in plantlist["type"].unique():
if (not type in dict_type.values()) & (str(type) != "nan"):
logger.error("Untranslated type: " + str(type))
#plantlist.head()
# Set technology based on fuels
plantlist['technology'][(plantlist['fuel']=='uranium')&((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'ST'
plantlist['technology'][(plantlist['fuel']=='lignite')&((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'ST'
plantlist['technology'][(plantlist['fuel']=='coal')&((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'ST'
plantlist['technology'][(plantlist['fuel']=='run_of_river')&((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'ROR'
plantlist['technology'][(plantlist['fuel']=='pumped_storage')&((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'PSP'
plantlist['technology'][(plantlist['fuel']=='reservoir')&((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'RES'
# Set technology based on name and block information combined with fuels (e.g. combined-cycle, gas turbine)
## Define technology CC as combination of GT and DT
plantlist['technology'][((plantlist['name'].str.contains("GT"))|(plantlist['block'].str.contains("GT")))&
((plantlist['name'].str.contains("DT"))|(plantlist['block'].str.contains("DT")))&
# ((plantlist['fuel']=='gas')|(plantlist['fuel']=='oil'))&
((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'CC'
## Define technology CC if specified as GuD
plantlist['technology'][((plantlist['name'].str.contains("GuD"))|(plantlist['block'].str.contains("GuD"))|
(plantlist['name'].str.contains("GUD"))|(plantlist['name'].str.contains("GUD")))&
# ((plantlist['fuel']=='gas')|(plantlist['fuel']=='oil'))&
((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'CC'
## Define technology GT
plantlist['technology'][((plantlist['name'].str.contains("GT"))|(plantlist['block'].str.contains("GT"))|
(plantlist['name'].str.contains("Gasturbine"))|(plantlist['block'].str.contains("Gasturbine")))&
# ((plantlist['fuel']=='gas')|(plantlist['fuel']=='oil'))&
((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'GT'
## Define technology ST
plantlist['technology'][((plantlist['name'].str.contains("DT"))|(plantlist['block'].str.contains("DT"))|
(plantlist['name'].str.contains("Dampfturbine"))|(plantlist['block'].str.contains("Dampfturbine"))|
(plantlist['name'].str.contains("Dampfkraftwerk"))|(plantlist['block'].str.contains("Dampfkraftwerk"))|
(plantlist['name'].str.contains("DKW"))|(plantlist['block'].str.contains("DKW")))&
# ((plantlist['fuel']=='gas')|(plantlist['fuel']=='oil'))&
((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'ST'
## Define technology CB
plantlist['technology'][((plantlist['name'].str.contains("motor"))|(plantlist['block'].str.contains("motor"))|
(plantlist['name'].str.contains("Motor"))|(plantlist['block'].str.contains("Motor")))&
# ((plantlist['fuel']=='gas')|(plantlist['fuel']=='oil'))&
((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'CB'
# Set technology ST for all technologies which could not be identified
plantlist['technology'][((plantlist['technology']=='')|(plantlist['technology'].isnull()))] = 'ST'
#plantlist.to_excel('power_plants_germany_tech.xlsx', sheet_name='output')
# Efficiencies
data_efficiencies_bnetza=pd.read_csv('inputs/input_efficiency_de.csv',
sep=';', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
encoding='cp1252')
data_efficiencies_bnetza = data_efficiencies_bnetza.set_index('id')
data_efficiencies_bnetza['efficiency_net'] = pd.to_numeric(data_efficiencies_bnetza['efficiency_net'],errors='coerce')
data_efficiencies_bnetza['efficiency_source'] = 'Efficiency source: ' + data_efficiencies_bnetza['efficiency_source']
data_efficiencies_bnetza = data_efficiencies_bnetza.dropna(subset=['efficiency_net'])
data_efficiencies_bnetza
plantlist = pd.merge(plantlist, data_efficiencies_bnetza, left_index=True, right_index=True, how='left')
#Add comment
plantlist['comment_interim'] = plantlist.apply(lambda x:'%s ; %s' % (x['comment'],x['efficiency_source']),axis=1)
plantlist['comment_interim'] = plantlist['comment_interim'].str.replace('nan ;', '')
plantlist['comment_interim'] = plantlist['comment_interim'].str.replace('; nan', '')
plantlist['comment_interim'] = plantlist['comment_interim'].str.replace('nan', '')
plantlist = plantlist.drop('comment', axis=1)
plantlist = plantlist.rename(columns = {'comment_interim':'comment'})
plantlist.head()
#plantlist['efficiency_net']
plantlist_for_efficiency_analysis = plantlist
plantlist_for_efficiency_analysis = plantlist_for_efficiency_analysis.dropna(subset=['efficiency_net'])
fuel_for_plot = ['lignite', 'coal', 'oil', 'gas']
plantlist_for_efficiency_analysis = plantlist_for_efficiency_analysis[plantlist_for_efficiency_analysis.fuel.isin(fuel_for_plot)]
plot_efficiency_type = Scatter(plantlist_for_efficiency_analysis,
notebook=True,
x='commissioned_float',
y='efficiency_net',
color='fuel',
title='Efficiency vs commissioning year',
xlabel='Year',
ylabel='Efficiency',
legend="top_left",
height=700,
width=1000,
)
show(plot_efficiency_type)
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
olslist = {}
for fuelnames in plantlist["fuel"].unique():
plantlist_for_efficiency_analysis = plantlist[(plantlist.fuel==fuelnames) & (plantlist.efficiency_net.notnull()==True)]
if len(plantlist_for_efficiency_analysis.index)>=4:
efficiencyestimate = ols("efficiency_net ~ commissioned_float + chp +uba_technology ", plantlist_for_efficiency_analysis).fit()
#Other possible inputs: + chp + capacity_float + status + np.square(commissioned_float) + np.log(commissioned_float)
#efficiencyestimate.summary()
olslist[fuelnames]=efficiencyestimate
#print(efficiencyestimate.params)
print(efficiencyestimate.summary())
#estimatedict = efficiencyestimate.params.to_dict()
#plantlist['efficiency_estimated'] = estimatedict['Intercept']
#for key in estimatedict:
# if key != 'Intercept':
# plantlist['efficiency_estimated'][plantlist.fuel==fuelnames] += estimatedict[key] * plantlist[key][plantlist.fuel==fuelnames]
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(efficiencyestimate, 'commissioned_float', ax=ax)
plt.ylabel("Efficiency")
plt.xlabel("Commissioned")
plt.title(fuelnames)
#plt.plot(plantlist['commissioned_float'][plantlist.fuel==fuelnames], plantlist['efficiency_net'][plantlist.fuel==fuelnames], 'ro')
#plt.plot(plantlist['commissioned_float'][plantlist.fuel==fuelnames], plantlist['efficiency_estimated'][plantlist.fuel==fuelnames], 'bo')
plt.legend(['Data', 'Fitted model'], loc=2)
#plt.axis([1900, 2020, 0.15, 1])
plt.show()
#Todo
Jonas Egerer, Clemens Gerbaulet, Richard Ihlenburg, Friedrich Kunz, Benjamin Reinhard, Christian von Hirschhausen, Alexander Weber, Jens Weibezahn (2014): Electricity Sector Data for Policy-Relevant Modeling: Data Documentation and Applications to the German and European Electricity Markets. DIW Data Documentation 72, Berlin, Germany.
data_efficiencies_literature=pd.read_csv('inputs/input_efficiency_literature_by_fuel_technology.csv',
sep=',', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
encoding='utf8')
data_efficiencies_literature['technology'] = data_efficiencies_literature['technology'].str.upper()
data_efficiencies_literature = data_efficiencies_literature.set_index(['fuel','technology'])
data_efficiencies_literature
plantlist = plantlist.join(data_efficiencies_literature,on=['fuel','technology'])
plantlist['efficiency_literature'] = plantlist['efficiency_intercept']+plantlist['efficiency_slope']*plantlist[['commissioned_float','retrofit_float']].max(axis=1)
plantlist
data_plant_locations=pd.read_csv('inputs/input_plant_locations_de.csv',
sep=';', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
encoding='cp1252')
data_plant_locations = data_plant_locations.set_index('id')
#data_plant_locations['googlemapslink'] = 'http://maps.google.com/maps?q=loc:' + data_plant_locations['lat'].map(str) + ',' + data_plant_locations['lon'].map(str) + '&t=h'
data_plant_locations['lat'] = pd.to_numeric(data_plant_locations['lat'], errors='coerce')
data_plant_locations['lon'] = pd.to_numeric(data_plant_locations['lon'], errors='coerce')
plantlist = pd.merge(plantlist, data_plant_locations, left_index=True, right_index=True, how='left')
plantlist.head()
This section tests the results in various ways to find errors
pivot_fuel_capacity = pd.pivot_table(
plantlist,
values='capacity',
index='fuel',
aggfunc=[np.sum]
)
pivot_fuel_capacity.sort_values(by='sum', inplace=True, ascending=0)
#pivot_fuel_capacity
pivot_fuel_capacity_plot=pivot_fuel_capacity.plot(kind='bar', legend=False, figsize=(12, 6))
pivot_fuel_capacity_plot.set_ylabel("MW")
pivot_fuel_capacity_plot
pivot_status_capacity = pd.pivot_table(
plantlist,
values='capacity',
columns='status',
index='fuel',
aggfunc=np.sum
)
pivot_status_capacity.sort_values(by='operating', inplace=True, ascending=0)
#pivot_status_capacity
pivot_status_capacity_plot=pivot_status_capacity.plot(kind='barh', stacked=True,legend=True, figsize=(12, 6))
pivot_status_capacity_plot.set_xlabel("MW")
pivot_status_capacity_plot
plantlist_filtered = plantlist#[plantlist.fuel=='coal']
pivot_age_capacity = pd.pivot_table(
plantlist_filtered,
values='capacity',
columns='fuel',
index='commissioned',
# index='fuel',
# columns='commissioned',
aggfunc=np.sum,
dropna=True
)
#pivot_age_capacity
pivot_age_capacity_plot=pivot_age_capacity.plot(kind='bar', stacked=True,legend=True, figsize=(17, 10))
pivot_age_capacity_plot.set_ylabel("MW")
pivot_age_capacity_plot
This chart is suitable to check outliers of commissioning years and block sizes. In theory, there should be no unexpected values, e.g. all commissioning years should be greater than 1900. Block sizes above 2000 MW are also unlikely.
plantlist_for_plot = plantlist.copy(deep=True)
plantlist_for_plot['capacity_float'] = pd.to_numeric(plantlist_for_plot['capacity'], errors='coerce')
plantlist_for_plot['commissioned_float'] = pd.to_numeric(plantlist_for_plot['commissioned'], errors='coerce')
plot_blocksize_year = Scatter(plantlist_for_plot,
notebook=True,
x='commissioned_float',
y='capacity_float',
color='fuel',
title='Block-Size vs Year of Commissioning',
xlabel='Year',
ylabel='MW',
legend="top_left",
height=500,
width=700,
)
show(plot_blocksize_year)
List all entries with zero capacity.
plantlist[plantlist.capacity == 0]
#Show all Plants with commisioning dates below 1900
plantlist[plantlist['commissioned_float'] <=1900]
#Show all Plants with invalid commisioning dates
plantlist[plantlist['commissioned_float'].isnull()]
# List all entries with diverging postcodes (if a postcode is given)
plantlist[(plantlist['uba_postcode'].notnull() == True) & (pd.to_numeric(plantlist.postcode, errors='coerce') != pd.to_numeric(plantlist.uba_postcode, errors='coerce'))]
capacitycomparison = pd.DataFrame(plantlist.capacity / plantlist.uba_capacity)
capacitycomparison['Name'] = plantlist.name
capacitycomparison['Block'] = plantlist.block
capacitycomparison['BnetzaCapacity'] = plantlist.capacity
capacitycomparison['UBACapacity'] = plantlist.uba_capacity
capacitycomparison.dropna(inplace=True)
capacitycomparison.sort_values(by=0)
commissioningcomparison = pd.DataFrame(plantlist.commissioned)
commissioningcomparison['UBACommissioned'] = plantlist.uba_commissioned
commissioningcomparison['commissioned_float'] = pd.to_numeric(commissioningcomparison['commissioned'],errors='coerce')
commissioningcomparison['UBACommissioned_float'] = pd.to_numeric(commissioningcomparison['UBACommissioned'],errors='coerce')
commissioningcomparison['Faktor'] = commissioningcomparison.commissioned_float - commissioningcomparison.UBACommissioned_float
commissioningcomparison['Name'] = plantlist.name
commissioningcomparison['Block'] = plantlist.block
commissioningcomparison.dropna(subset = ['commissioned'], inplace=True)
commissioningcomparison.dropna(subset = ['UBACommissioned'], inplace=True)
commissioningcomparison = commissioningcomparison.sort_values(by='Faktor')
commissioningcomparison
# Merge uba_name_aggregate and uba_name
plantlist['uba_name_aggregate'][plantlist['uba_name_aggregate'].isnull()] = plantlist['uba_name']
# Drop columns not relevant for output
colsToDrop = ['bnetza_id',
'capacity',
'uba_name',
'uba_capacity_original',
'uba_chp_capacity_original',
'uba_city',
'uba_commissioned',
'uba_company',
'uba_company_aggregate',
'uba_fuel',
'uba_postcode',
'uba_state',
'uba_technology',
'uba_technology_aggregate',
'retrofit',
'uba_commissioned_1',
'uba_commissioned_2',
'uba_commissioned_3',
'commissioned_1',
'commissioned_2',
'commissioned_3',
'fuel_basis',
'fuel_multiple1',
'fuel_multiple2',
'efficiency_gross',
'efficiency_intercept',
'efficiency_slope',
'efficiency_source',
'source_type',
'date',
'location_checked',
]
plantlist = plantlist.drop(colsToDrop, axis=1)
# Rename columns
plantlist = plantlist.rename(columns={'commissioned':'commissioned_original',
'commissioned_float':'commissioned',
'retrofit_float':'retrofit',
'capacity_float':'capacity',
'uba_capacity':'capacity_uba',
'uba_chp_capacity':'chp_capacity_uba',
'efficiency_net':'efficiency_data',
'efficiency_literature':'efficiency_estimate',
'uba_name_aggregate':'name_uba'})
# Sort columns
columns_sorted = ['company',
'name',
'postcode',
'city',
'street',
'state',
'block',
'commissioned_original',
'commissioned',
'retrofit',
'shutdown',
'status',
'fuel',
'technology',
'type',
'eeg',
'chp',
'capacity',
'capacity_uba',
'chp_capacity_uba',
'efficiency_data',
'efficiency_estimate',
'network_node',
'voltage',
'network_operator',
'name_uba',
'lat',
'lon',
'comment']
plantlist = plantlist.reindex(columns=columns_sorted)
plantlist.head()
We document the data packages meta data in the specific format JSON as proposed by the Open Knowledge Foundation. See the Frictionless Data project by OKFN (http://data.okfn.org/) and the Data Package specifications (http://dataprotocols.org/data-packages/) for more details.
In order to keep the notebook more readable, we first formulate the metadata in the human-readable YAML format using a multi-line string. We then parse the string into a Python dictionary and save that to disk as a JSON file.
# Here we define meta data of the resulting data package.
# The meta data follows the specification at:
# http://dataprotocols.org/data-packages/
metadata = """
name: opsd-power-plants-germany
title: List of power plants in Germany.
description: This dataset contains an augmented and corrected power plant list based on the power plant list provided by the BNetzA.
version: 0.0.1
keywords: [power plants,germany]
resources:
path: power_plants_germany.csv
format: csv
mediatype: text/csv
schema: # Schema according to: http://dataprotocols.org/json-table-schema/
fields:
- name: id
description: Power plant ID based on the ID provided in the BNetzA-list.
type: string
- name: company
description: Company name
type: string
- name: name
description: Power plant name
type: string
format: default
- name: postcode
description: Postcode
type: string
format: default
- name: city
description: City
type: string
format: default
- name: street
description: Street
type: string
format: default
- name: state
description: State
type: string
format: default
- name: block
description: Power plant block
type: string
format: default
- name: commissioned_original
description: Year of commissioning (raw data)
type: string
format: default
- name: commissioned
description: Year of commissioning
type: integer
format: default
- name: retrofit
description: Year of modernization according to UBA data
type: integer
format: default
- name: shutdown
description: Year of decommissioning
type: integer
format: default
- name: status
description: Power plant status
type: string
format: default
- name: fuel
description: Used fuel or energy source
type: string
format: default
- name: technology
description: Power plant technology or sort
type: string
format: default
- name: type
description: Purpose of the produced power
type: string
format: default
- name: eeg
description: Status of being entitled to a renumeration
type: boolean
format: default
- name: chp
description: Status of being able to supply heat
type: boolean
format: default
- name: capacity
description: Power capacity
type: number
format: default
- name: capacity_uba
description: Power capacity according to UBA data
type: number
format: default
- name: chp_capacity_uba
description: Heat capacity according to UBA data
type: number
format: default
- name: efficiency_data
description: Proportion between power output and input
type: number
format: default
- name: efficiency_estimate
description: Estimated proportion between power output and input
type: number
format: default
- name: network_node
description: Connection point to the electricity grid
type: string
format: default
- name: voltage
description: Grid or transformation level of the network node
type: string
format: default
- name: network_operator
description: Network operator of the grid or transformation level
type: string
format: default
- name: uba_name_aggregate
description: Power plant name according to UBA data
type: string
format: default
- name: lat
description: Precise geographic coordinates - latitude
type: number
format: default
- name: lon
description: Precise geographic coordinates - longitude
type: number
format: default
- name: comment
description: Further comments
type: string
format: default
licenses:
- url: http://example.com/license/url/here
name: License Name Here
version: 1.0
id: license-id-from-open
sources:
- name: BNetzA Kraftwerksliste,
web: http://www.bundesnetzagentur.de/DE/Sachgebiete/ElektrizitaetundGas/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/kraftwerksliste-node.html
- name: Umweltbundesamt Datenbank Kraftwerke in Deutschland,
web: http://www.umweltbundesamt.de/dokument/datenbank-kraftwerke-in-deutschland
maintainers:
- name: OPSD-Team,
email: OPSD-Team-email,
web: http://open-power-system-data.org/
views:
# You can put hints here which kind of graphs or maps make sense to display your data. This makes the
# Data Package Viewer at http://data.okfn.org/tools/view automatically display visualazations of your data.
# See http://data.okfn.org/doc/data-package#views for more details.
# extend your datapackage.json with attributes that are not
# part of the data package spec
# you can add your own attributes to a datapackage.json, too
openpowersystemdata-enable-listing: True # This is just an example we don't actually make use of yet.
"""
metadata = yaml.load(metadata)
datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': '))
Write the outputs
output_path = 'output/datapackage_powerplants_germany/'
output_path2 = 'output/datapackage_powerplants_germany'
#Write the result to file
plantlist.to_csv(output_path+'power_plants_germany.csv', encoding='utf-8')
#Write the results to excel file
plantlist.to_excel(output_path+'power_plants_germany.xlsx', sheet_name='output')
#Write the information of the metadata
with open(os.path.join(output_path, 'datapackage.json'), 'w') as f:
f.write(datapackage_json)
#Set this string to this notebook's filename!
nb_filename = 'Power_Plants_DE.ipynb'
# Save a copy of the notebook to markdown, to serve as the package README file
subprocess.call(['ipython', 'nbconvert', '--to', 'markdown', nb_filename])
path_readme = os.path.join(output_path2, 'README.md')
try:
os.remove(path_readme)
except Exception:
pass
os.rename(nb_filename.replace('.ipynb', '.md'), path_readme)