Part 1 of the script (Download and process Notebook) has downloaded and merged the original data. This Notebook subsequently checks, validates the list of renewable power plants and creates CSV/XLSX/SQLite files. It also generates a daily time series of cumulated installed capacities by energy source.
(Before running this script make sure you ran Part 1, so that the renewables.pickle files for each country exist in the same folder as the scripts)
settings = {
'version': '2020-08-25',
'changes': 'Updated all countries with new data available (DE, FR, PL, CH, DK, UK), added data for CZ and SE.'
}
settings['referenceDate'] = settings['version']
import json
import logging
import os
import urllib.parse
import re
import zipfile
import gc
import pandas as pd
import numpy as np
import requests
import sqlalchemy
import yaml
import hashlib
import os
import fiona
import cartopy.io.shapereader as shpreader
import shapely.geometry as sgeom
from shapely.prepared import prep
from shapely.ops import unary_union
import fake_useragent
import datetime
import xlsxwriter
from IPython.display import Markdown
%matplotlib inline
# Option to make pandas display 40 columns max per dataframe (default is 20)
pd.options.display.max_columns = 40
# Create input and output folders if they don't exist
os.makedirs(os.path.join('input', 'original_data'), exist_ok=True)
os.makedirs('output', exist_ok=True)
os.makedirs(os.path.join('output', 'renewable_power_plants'), exist_ok=True)
package_path = os.path.join('output', 'renewable_power_plants',settings['version'])
os.makedirs(package_path, exist_ok=True)
source_df = pd.read_csv(os.path.join('input', 'sources.csv'))
source_df
First, select the countries you want to validate. Fill in the array countries
with their codes.
Run the following cell to see which countries are available in this version.
set(source_df['country'].unique().tolist()) - set(['EU'])
# Fill in this array with the codes of the countries you want to validate.
# E.g. countries = ['CH', 'CZ', 'DE', 'DK', 'FR', 'PL', 'SE', 'UK']
countries = ['CH', 'CZ', 'DE', 'DK', 'FR', 'PL', 'SE', 'UK']
#countries = ['FR']
Now, load the data on the selected countries.
all_countries = set(source_df['country'].unique().tolist()) - set(['EU'])
all_countries_non_DE = all_countries - set(['DE'])
#all_countries_dirty = set(['DE_dirty', 'FR_dirty'])
all_countries_including_dirty = all_countries | set(['DE_dirty', 'FR_dirty'])
# Read data from script Part 1 download_and_process
dfs = {}
for country in countries:
print('Loading', country)
path = os.path.join('intermediate', country+'_renewables.pickle')
if os.path.exists(path):
dfs[country] = pd.read_pickle(path)
print('\tDone!')
else:
print('\tThe file', path, 'does not exist.')
# Calling garbage collector may speed up the process on some machines
gc.collect()
The coastline shapefile is needed to check if the geocoordinates of the land powerplants point to a land location, and conversely, if the geocoordinates of the onshore facilities point to a location not on land.
coastline_url = 'https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip'
user_agent = fake_useragent.UserAgent()
directory_path = os.path.join('input', 'maps', 'coastline')
os.makedirs(directory_path, exist_ok=True)
filepath = os.path.join(directory_path, 'gshhg-shp-2.3.7.zip')
# check if the file exists; if not, download it
if not os.path.exists(filepath):
session = requests.session()
print(coastline_url)
print('Downloading...')
headers = {'User-Agent' : user_agent.random}
r = session.get(coastline_url, headers=headers, stream=True)
total_size = r.headers.get('content-length')
total_size = int(total_size)
chuncksize = 4096
with open(filepath, 'wb') as file:
downloaded = 0
for chunck in r.iter_content(chuncksize):
file.write(chunck)
downloaded += chuncksize
print('\rProgress: {:.2f}%'.format(100 * downloaded / float(total_size)), end='')
print(' Done.')
zip_ref = zipfile.ZipFile(filepath, 'r')
zip_ref.extractall(directory_path)
zip_ref.close()
else:
print('The file is already there:', filepath)
filepath = '' + filepath
coastline_shapefile_path = os.path.join('input', 'maps', 'coastline', 'GSHHS_shp', 'f', 'GSHHS_f_L1.shp')
print("Shapefile path: ", coastline_shapefile_path)
This section checks the DataFrame for a set of pre-defined criteria and adds markers to the entries in an additional column. The marked data will be included in the output files, but marked, so that they can be easiliy filtered out. For creating the validation plots and the time series, suspect data is skipped.
Each marker will be represented by its code, the country code it applies to, short and long descriptions, as well as a python
function that returns a boolean mask whose True
values denote the marked rows.
We proceed by definining the markers for Germany, France and the United Kingdom, and storing them into a dictionary. We also define a routine for extracting country-specific markers from it.
# Create a dictionary to store markers
validation_markers = {}
# Define a function for extracting country-specific markers from the dictionary
def get_markers(validation_markers, country):
markers = {}
for key in validation_markers:
if validation_markers[key]['Country'] == country:
short_explanation = validation_markers[key]['Short explanation']
long_explanation = validation_markers[key]['Long explanation']
marker_function = validation_markers[key]['function']
markers[key] = {'Short explanation' : short_explanation,
'Long explanation' : long_explanation,
'function' : marker_function}
return markers
key = 'R_1'
cutoff_date_bnetza = '2017-12-31'
cutoff_date_bnetza = pd.Timestamp(2017, 12, 31)
R1_DE_marker_function = lambda df: (df['commissioning_date'] <= cutoff_date_bnetza) &\
(df['data_source'].isin(['BNetzA', 'BNetzA_PV', 'BNetzA_PV_historic']))
validation_markers[key] = {
"Country" : "DE",
"function" : R1_DE_marker_function,
"Short explanation": "data_source = BNetzA and commissioning_date < " + str(cutoff_date_bnetza.date()),
"Long explanation": "This powerplant is probably also represented by an entry from the TSO data and should therefore be filtered out."
}
key = 'R_2'
R2_DE_marker_function = lambda df: (df['notification_reason'] != 'Inbetriebnahme') & \
(df['data_source'] == 'BNetzA')
validation_markers[key] = {
"Country" : "DE",
"function" : R2_DE_marker_function,
"Short explanation": "notification_reason other than commissioning (Inbetriebnahme)",
"Long explanation": "This powerplant is probably represented by an earlier entry already (possibly also from the TSO data) and should therefore be filtered out."
}
key = 'R_3'
R3_DE_marker_function = lambda df : df['commissioning_date'].isnull()
validation_markers[key] = {
"Country" : "DE",
"function" : R3_DE_marker_function,
"Short explanation": "commissioning_date not specified",
"Long explanation": ""
}
key = 'R_4'
R4_DE_marker_function = lambda df : df['electrical_capacity'] <= 0.0
validation_markers[key] = {
"Country" : "DE",
"function" : R4_DE_marker_function,
"Short explanation": "electrical_capacity not specified",
"Long explanation": ""
}
key = 'R_5'
# Just the entry which is not double should be kept, thus the other one is marked
R5_DE_marker_function = lambda df : df['grid_decommissioning_date'].isnull() == False
validation_markers[key] = {
"Country" : "DE",
"function" : R5_DE_marker_function,
"Short explanation": "decommissioned from the grid",
"Long explanation": "This powerplant is probably commissioned again to the grid of another grid operator and therefore this doubled entry should be filtered out."
}
key = 'R_6'
R6_DE_marker_function = lambda df: df['decommissioning_date'].isnull() == False
validation_markers[key] = {
"Country" : "DE",
"function" : R6_DE_marker_function,
"Short explanation": "decommissioned",
"Long explanation": "This powerplant is completely decommissioned."
}
# Note that we skip R7 here as R7 is used for
# Frech oversees power plants below (we never change meanings of R markers, so R7 stays reserved for that)
key = 'R_8'
# note that this depends on BNetzA items to be last in list, because we want to keep the TSO items
R8_DE_marker_function = lambda df: (df.duplicated(['eeg_id'],keep='first')) & \
(df['eeg_id'].isnull() == False)
validation_markers[key] = {
"Country" : "DE",
"function" : R8_DE_marker_function,
"Short explanation": "duplicate_eeg_id",
"Long explanation": "This power plant is twice in the data (e.g. through BNetzA and TSOs)."
}
key = 'R_7'
R7_FR_marker_function = lambda df: (df['lat'] < 41) | (df['lon'] < -6) | (df['lon'] > 10)
validation_markers[key] = {
"Country" : "FR",
"function" : R7_FR_marker_function,
"Short explanation": "not connected to the European grid",
"Long explanation": "This powerplant is located in regions belonging to France but not located in Europe (e.g. Guadeloupe)."
}
key = 'R_10'
R10_FR_marker_function = lambda df: ((df['data_source'] == 'OPEN DATA RESEAUX ENERGIES') &
(
(df['commissioning_date'].isnull()) |
(df['disconnection_date'].isnull() == False)
)
)
validation_markers[key] = {
"Country" : "FR",
"function" : R10_FR_marker_function,
"Short explanation": "inactive",
"Long explanation": "This powerplant is inactive: not commissioned or is disconnected from the grid."
}
# Create a function to check if an offshore powerplant is not wind
geoms = fiona.open(coastline_shapefile_path)
land_geom = sgeom.MultiPolygon([sgeom.shape(geom['geometry']) for geom in geoms])
land = prep(land_geom)
def not_on_land_but_should_be(powerplant_data):
longitude = powerplant_data['lon']
latitude = powerplant_data['lat']
if pd.isnull(longitude) or pd.isnull(latitude):
return False
not_on_land = not land.contains(sgeom.Point(longitude, latitude))
offshore_ok = 'Offshore' in [powerplant_data['region'], powerplant_data['municipality']] or \
(powerplant_data['energy_source_level_2'] in ['Wind', 'Marine'])
return not_on_land and not offshore_ok
key = 'R_9'
R9_UK_marker_function = lambda df: df.apply(not_on_land_but_should_be, axis=1)
validation_markers[key] = {
"Country" : "UK",
"function" : R9_UK_marker_function,
"Short explanation": "Not on land, but should be.",
"Long explanation": "The geocoordinates of this powerplant indicate that it is not on the UK mainland, but the facility is not an offshore wind farm."
}
for country in countries:
print(country)
# Get markers for the country in question
markers = get_markers(validation_markers, country)
if len(markers) > 0:
# Create an empty marker column
dfs[country]['comment'] = ""
for key in markers:
# Extract the marker function
print('\t', key)
marker_function = markers[key]['function']
# Mark the data
marked_mask = marker_function(dfs[country])
#marked_indices = (marked_mask[marked_mask == True]).index
# Add the marker key to the comment column of the marked rows
dfs[country].loc[marked_mask, 'comment'] += (key + '|')
# Remove unnecessary variables which may be taking up a lot of memory
del marked_mask
print('\tDone!')
else:
print('\tNo markers for this country.')
print('Done!')
For each country, show the total capacity aggregated by (1) the comment and data source and (2) the comment and energy level 2.
for country in countries:
if 'comment' in dfs[country].columns:
print(country)
# Summarize the capacity of data by comment and data_source
summary = dfs[country].groupby(['comment', 'data_source'])['electrical_capacity'].sum().to_frame()
display(summary)
# Summarize the capacity of data by comment and energy type
summary = (dfs[country].groupby(['comment', 'energy_source_level_2'])['electrical_capacity']
.sum().to_frame())
display(summary)
Here, we define the order of the columns for each country.
In order to be consistent, we adopt the following approach. We define the following groups of columns:
Each column that appears in any dataset is classified as belonging to one and only one group. For each group, we define the inner order of the columns that belong to it.
We will enforce the following rules for all the countries:
A
and B
are in the same group, then A
appears before B
if and only if it precedes B
in the group's inner order, and vice versa.A
belongs to group i
and column B
to group j
, then A
appears before B
if and only if i<j
, and vice versa.This means that all energy columns present in the data appear before any source column present in the set, that all location columns in the dataset appear before any temporal column, and so on. The columns in the same group follow the group's inner order. If we were to order the union of column names of all the datasets according to these rules, we would get an array of columns that we'll call the default order.
Note 1: The datasets of different countries have different columns. For example, not all location columns are present in all the datasets because not all sources describe their plants with the same level of precision.
Note 2: All the datasets have all the columns in the energy and source columns.
# The columns in this group show what type of energy is being produced
# at the facility in question and what is its capacity.
energy_columns = [
'electrical_capacity', 'energy_source_level_1', 'energy_source_level_2',
'energy_source_level_3', 'technology'
]
# The columns in this group describe the data source.
source_columns = [
'data_source'
]
# The columns in this group bear information on the power plant's location
# such as NUTS codes, latitude and longitude, municipality etc.
location_columns = [
'nuts_1_region', 'nuts_2_region', 'nuts_3_region',
'lon', 'lat', 'municipality', 'municipality_code',
'postcode', 'address', 'region', 'region_code',
'municipality_group', 'municipality_group_code',
'departement', 'departement_code','county', 'locality',
'country', 'district', 'canton', 'federal_state'
]
# The columns in this group refer to the significant dates related
# to the power plants, such as its commissioning date.
temporal_columns = [
'commissioning_date', 'decommissioning_date', 'connection_date',
'disconnection_date', 'contract_period_end'
]
# The columns in this groupe provide the information on the plant's owner.
owner_columns = [
'owner', 'company'
]
# The columns in this group provide the plant's name and its id in the
# original data supplied by the data source.
name_and_id_columns = [
'site_name', 'IRIS_code', 'URE_id', 'eeg_id', 'EIC_code',
'uk_beis_id', 'se_vindbrukskollen_id'
]
# These columns describe some technical aspects of the plant in question.
technical_columns = [
'solar_mounting_type', 'chp', 'hub_height', 'rotor_diameter',
'model', 'capacity_individual_turbine', 'gsrn_id', 'number_of_turbines',
'voltage_level', 'number_of_installations'
]
# The columns in this groups are those that do not belong to any group
# previously defined.
other_columns = [
'tariff', 'project_name', 'dso', 'dso_id', 'tso', 'operator',
'manufacturer', 'production','as_of_year', 'comment', 'geographical_resolution'
]
# Now, we define the order in which the columns may appear in a dataframe for a country.
# This means that, e.g., all energy columns in a country's dataframe must appear before any source column,
# that all location columns must appear before any temporal column, and so on.
ordered_groups = [
energy_columns, source_columns, location_columns, temporal_columns,
owner_columns, technical_columns, name_and_id_columns, other_columns
]
# Merge or the columns
default_order = []
for group in ordered_groups:
default_order.extend(group)
# Uncomment the following line to show the default order
#default_order
Let us know apply those rules and define the order for each country.
# Set up the variable to contain the text for the markdown displaying the ordered columns for all the countries
markdown_text = '| Country | Order of Columns |\n|:---|:---|\n'
# and a dictionary to contain the order for each country.
column_lists = {}
# Fill the dictionary and the text.
for country in dfs:
# Make sure that all countries have all the energy columns
# even if it means that some are going to be empty
country_columns = list(dfs[country].columns)
country_columns = list(set(country_columns + energy_columns))
# Determine the order.
order_for_the_country = [column for column in default_order if column in country_columns]
# Remember it.
column_lists[country] = order_for_the_country
# Prepare the markdown to display
columns_as_text = ', '.join(order_for_the_country)
markdown_text += '| {} | {} |\n'.format(country, columns_as_text)
# Display the order of columns for each country
markdown = Markdown(markdown_text)
display(markdown)
Finally, let's sort the columns according to the orders we defined above.
for country in column_lists:
if country in countries and country in dfs:
print('Harmonizing the order for', country)
for column in column_lists[country]:
if column not in dfs[country].columns:
print(country, 'has no column named:', column, '. It will be empty. Check if that\'s ok.')
dfs[country] = dfs[country].loc[:, column_lists[country]]
print('\tDone!')
Here, we clean the columns to make sure that integer columns really contain integers, to enforce the format for dates (YYYY-MM-DD), to control the precision for decimal columns, and to make all strings one-line and without trailing white spaces.
cleaning_specs = {
'decimal' : {
'DE': ['electrical_capacity','lat','lon'],
'DK': ['electrical_capacity','lat','lon'],
'CH': ['electrical_capacity','lat','lon'],
'FR': ['electrical_capacity','lat','lon'],
'PL': ['electrical_capacity'],
'UK': ['electrical_capacity', 'lat', 'lon'],
'SE': ['electrical_capacity', 'lat', 'lon'],
'CZ': ['electrical_capacity', 'lat', 'lon']
},
'integer': {
'DE': ['municipality_code'],
'UK': ['uk_beis_id', 'number_of_turbines'],
'FR': ['IRIS_code', 'departement_code', 'municipality_code', 'municipality_group_code',
'region_code', 'as_of_year'],
'PL': ['URE_id', 'as_of_year'],
'DK': ['gsrn_id']
},
'date': {
'DE': ['commissioning_date', 'decommissioning_date'],
'DK': ['commissioning_date'],
'FR': ['commissioning_date', 'connection_date', 'disconnection_date'],
'CH': ['commissioning_date'],
'UK': ['commissioning_date'],
'SE': ['commissioning_date']
},
'one-line string': {
'DE' : ['federal_state', 'municipality', 'address'],
'DK' : ['municipality', 'address', 'manufacturer', 'model'],
'FR' : ['municipality', 'EIC_code', 'site_name', 'departement', 'municipality_group', 'region'],
'PL' : ['district', 'region'],
'CH' : ['municipality', 'project_name', 'canton', 'address', 'company'],
'UK' : ['address', 'municipality', 'site_name', 'region'],
'SE' : ['municipality', 'county', 'manufacturer', 'se_vindbrukskollen_id', 'site_name'],
'CZ' : ['site_name', 'owner', 'region', 'municipality', 'locality']
}
}
def to_1_line(string):
if pd.isnull(string) or not isinstance(string, str):
return string
return string.replace('\r', '').replace('\n', '')
for cleaning_type, cleaning_spec in cleaning_specs.items():
for country, fields in cleaning_spec.items():
if country not in countries or country not in dfs:
continue
for field in fields:
print('Cleaning ' + country + '.' + field +' to ' + cleaning_type + '.')
if cleaning_type == 'decimal':
dfs[country][field] = dfs[country][field].map(lambda x: round(x, 14))
elif cleaning_type == 'integer':
dfs[country][field] = pd.to_numeric(dfs[country][field], errors='coerce')
dfs[country][field] = dfs[country][field].map(lambda x: '%.0f' % x)
elif cleaning_type == 'date':
dfs[country][field] = dfs[country][field].apply(lambda x: x.date())
elif cleaning_type == 'one-line string':
dfs[country][field] = dfs[country][field].apply(lambda x: to_1_line(x))
print('Done!')
Now, let us sort the plants in each dataset by the commissioning date, or the most precise location column if the source does not provide the date.
sort_by = {
'DE': 'commissioning_date',
'DK': 'commissioning_date',
'CH': 'commissioning_date',
'FR': 'commissioning_date',
'PL': 'district',
'UK': 'commissioning_date',
'SE': 'commissioning_date'
}
for country, sort_by in sort_by.items():
if country not in countries or country not in dfs:
continue
print('Sorting', country)
dfs[country] = dfs[country].iloc[dfs[country][sort_by].sort_values().index]
dfs[country].reset_index(drop=True, inplace=True)
print('\tDone!')
print('Done!')
del sort_by
This step may take some time.
for country in dfs:
print(country)
dfs[country].replace('nan', '', inplace=True)
dfs[country].fillna('', inplace=True)
print('\tDone!')
print('Done!')
We separate all plants in the FR and DE datasets which have a validation marker in the comments column into a separate DataFrame and eventually also in a separate CSV file, so the main country files only contain "clean" plants, i.e. those without any special comment. This is useful since all our comments denote that most people would probably not like to include them in their calculations.
The dirty power plants in the UK are not separated from the clean.
dirty_keys = {
'DE' : 'DE_dirty',
'FR' : 'FR_dirty'
}
dirty_countries = [key for key in dirty_keys if key in countries]
for country in dirty_countries:
idx_dirty = dfs[country][dfs[country].comment.str.len() > 1].index
dirty_key = dirty_keys[country]
dfs[dirty_key] = dfs[country].loc[idx_dirty]
dfs[dirty_key].drop_duplicates(inplace=True)
dfs[dirty_key].reset_index(drop=True, inplace=True)
dfs[country] = dfs[country].drop(idx_dirty, axis='index')
dfs[country] = dfs[country].drop('comment', axis='columns')
dfs[country].reset_index(drop=True, inplace=True)
del idx_dirty
gc.collect()
# Define the order for the outvalidated plants in Germany and France
column_lists['DE_dirty'] = list(column_lists['DE'])
column_lists['FR_dirty'] = list(column_lists['FR'])
# Remove the comment from the column orders for France and Germany
for country in dirty_keys:
column_lists[country] = [column for column in column_lists[country] if column != 'comment']
for country in dfs:
print(country)
before = dfs[country].shape[0]
dfs[country].drop_duplicates(inplace=True)
dfs[country].reset_index(drop=True, inplace=True)
after = dfs[country].shape[0]
print('\tDone! Dropped {} out of {} plants.'.format(before - after, before))
This section creates a daily and yearly time series of the cumulated installed capacity by energy source for the United Kingdom, Germany, Denmark, Switzerland, France and Sweden (if selected at the beginning of the notebook). Three time series will be created for the UK: one for the whole country (GB-UKM), one for Northern Ireland (GB-NIR), and one for the Great Britain (GB-GBN). The data will be a part of the output and will be compared in a plot for validation in the next section.
daily_timeseries = {}
def to_new_level(row):
if(row['energy_source_level_2'] == 'Wind'):
energy_type_label = (row['energy_source_level_2']+'_'+row['technology']).lower()
else:
energy_type_label = row['energy_source_level_2'].lower()
return energy_type_label
def min_date(series):
mask = series.apply(lambda x: type(x) == datetime.date)
return series[mask].min()
def max_date(series):
mask = series.apply(lambda x: type(x) == datetime.date)
return series[mask].max()
def to_daily_timeseries(df, start_date, end_date):
# Filter out missing dates
invalid_date_mask = df['commissioning_date'].apply(lambda x: pd.isna(x) or (isinstance(x, str) and len(x) == 0))
df = df.loc[~invalid_date_mask, :]
# Combine energy levels to new standardized values
energy_type = df[['energy_source_level_2', 'energy_source_level_3', 'technology']].apply(to_new_level, axis=1)
df['energy_type'] = energy_type
# Set range of time series as index
daily_timeseries = pd.DataFrame(index=pd.date_range(start=start_date, end=end_date, freq='D'))
# Create cumulated time series per energy source for both yearly and daily time series
for energy_type in df['energy_type'].unique():
temp = (df[['commissioning_date', 'electrical_capacity']]
.loc[df['energy_type'] == energy_type])
temp_timeseries = temp.set_index('commissioning_date')
temp_timeseries.index = pd.DatetimeIndex(temp_timeseries.index)
# Create cumulated time series per energy_source and day
resampled = temp_timeseries.resample('D')
summed_by_day = resampled.sum()
cumulative_sums = summed_by_day.cumsum()
daily_timeseries[energy_type] = cumulative_sums.fillna(method='ffill') # fill missing values
# Make sure that the columns are properly filled
daily_timeseries[energy_type]= daily_timeseries[energy_type].fillna(method='ffill').fillna(value=0)
# Reset the time index
daily_timeseries.reset_index(inplace=True)
# Set the index name
daily_timeseries.rename(columns={'index': 'day'}, inplace=True)
# Drop the temporary column "energy_type"
df.drop('energy_type', axis=1, inplace=True)
return daily_timeseries
eligible_for_timeseries = [country for country in countries if 'commissioning_date' in dfs[country].columns]
#eligible_for_timeseries = ['CH', 'UK', 'DK', 'DE', 'SE', 'FR'] #
possible_start_dates = [min_date(dfs[country]['commissioning_date']) for country in eligible_for_timeseries]
possible_end_dates = [max_date(dfs[country]['commissioning_date']) for country in eligible_for_timeseries]
#print("Possible start and end dates:")
#for country in eligible_for_timeseries:
# print(country, min_date(dfs[country]['commissioning_date']), max_date(dfs[country]['commissioning_date']))
if len(possible_start_dates) == 0:
print('We cannot create timeseries from this data. Please, skip the cells which deal with timeseries.')
start_date = min(possible_start_dates)
end_date = max(possible_end_dates)
for country in eligible_for_timeseries:
print("Timeseries for", country)
daily_timeseries[country] = to_daily_timeseries(dfs[country], start_date, end_date)
print('Done!')
if 'UK' in dfs:
# Create the mask for Northern Ireland
ni_mask = dfs['UK']['country'] == 'Northern Ireland'
# Split the UK data
ni_df = dfs['UK'][ni_mask].copy()
gb_df = dfs['UK'][~ni_mask].copy()
# Make the timeseries for Northern Ireland
daily_timeseries['GB-NIR'] = to_daily_timeseries(ni_df, start_date, end_date)
# Make the timeseries for Great Britain (England, Wales, Scotland)
daily_timeseries['GB-GBN'] = to_daily_timeseries(gb_df, start_date, end_date)
# Renaming the entry for UK to conform to the ISO codes
daily_timeseries['GB-UKM'] = daily_timeseries.pop('UK')
# Create the column "wind" as a sum of more specific wind technologies (onshore, offshore, other or unspecified)
# if not only one is present in the series.
for country in daily_timeseries:
columns = daily_timeseries[country].columns
wind_columns = ['wind_onshore', 'wind_offshore', 'wind_other or unspecified technology']
flags = {wind_column: wind_column in columns for wind_column in wind_columns}
present_technologies = [wind_column for wind_column in flags if flags[wind_column] == True]
if len(present_technologies) > 1:
print('Adding', ' and '.join(present_technologies) , 'for', country )
daily_timeseries[country]['wind'] = 0
for wind_column in flags:
if flags[wind_column]:
daily_timeseries[country]['wind'] += daily_timeseries[country][wind_column]
unified_daily_timeseries = pd.DataFrame(index=pd.date_range(start=start_date, end=end_date, freq='D'))
# Append the country name to capacity columns' names
for c in daily_timeseries:
new_columns = [c + "_" + col + "_capacity" if col != 'day' else 'day' for col in daily_timeseries[c].columns]
daily_timeseries[c].columns = new_columns
# Unify separate series
unified_daily_timeseries = pd.concat(daily_timeseries.values(), axis=1, sort=False)
# Make sure the day column appears only once
days = unified_daily_timeseries['day']
if not isinstance(days, pd.core.series.Series):
unified_daily_timeseries.drop('day', axis=1, inplace=True)
unified_daily_timeseries['day'] = days.iloc[:, 0]
unified_daily_timeseries
# sort columns alphabetically
unified_daily_timeseries = unified_daily_timeseries.reindex(sorted(unified_daily_timeseries.columns), axis=1)
unified_daily_timeseries = unified_daily_timeseries.set_index('day').reset_index() # move day column to first position
# drop column DE_hydro because it is not all of hydro but only subsidised hydro, which could be misleading
if 'DE' in countries and 'DE_hydro_capacity' in unified_daily_timeseries.columns:
unified_daily_timeseries.drop(columns='DE_hydro_capacity', inplace=True)
# do the same for CH_hydro for the same reason
if 'CH' in countries and 'CH_hydro_capacity' in unified_daily_timeseries.columns:
unified_daily_timeseries.drop(columns='CH_hydro_capacity', inplace=True)
# Show some rows
unified_daily_timeseries.tail(2)
Here, we create a dataframe containing the following data for all the countries:
geographical_resolution = {
'PL' : 'power plant',
'FR' : lambda x: 'power plant' if x['data_source'] == 'OPEN DATA RESEAUX ENERGIES' else 'municipality',
'CH' : 'municipality',
'DE' : 'power plant',
'DK' : 'power plant',
'UK' : 'power plant',
'SE' : 'power plant',
'CZ' : 'power plant'
}
dfs_to_concat = []
columns = [ 'electrical_capacity', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3',
'technology', 'data_source', 'nuts_1_region', 'nuts_2_region', 'nuts_3_region',
'lon', 'lat', 'municipality', 'country', 'commissioning_date', 'as_of_year', 'geographical_resolution'
]
for country in countries:
if country not in dfs:
continue
#for column in columns:
# if column not in dfs[country].columns:
# print(column, 'not in the df of ', country, '-- check if that\'s ok.')
country_df = dfs[country].loc[:, columns].copy()
country_df['country'] = country
resolution = geographical_resolution[country]
if callable(resolution):
country_df['geographical_resolution'] = country_df.apply(resolution, axis=1)
else:
country_df['geographical_resolution'] = geographical_resolution[country]
dfs_to_concat.append(country_df)
european_df = pd.concat(dfs_to_concat)
european_df.reset_index(inplace=True, drop=True)
european_df.drop_duplicates(inplace=True)
european_df.reset_index(inplace=True, drop=True)
european_df.sample(n=5)
os.makedirs(package_path, exist_ok=True)
# Make sure the daily timeseries has only the date part, not the full datetime with time information
unified_daily_timeseries['day'] = unified_daily_timeseries['day'].dt.date
One csv-file for each country. This process will take some time depending on your hardware.
# Write each country's dataset as a separate csv file
table_names = {}
dirty_explanations = {
'DE_dirty' : 'outvalidated_plants',
'FR_dirty' : 'outvalidated_plants'
}
for country in dfs:
print(country)
if '_dirty' in country:
table_names[country] = 'res_plants_separated_' + country[:-6] + '_' + dirty_explanations.get(country, '')
else:
table_names[country] = 'renewable_power_plants_' + country
dfs[country].to_csv(os.path.join(package_path, table_names[country]+'.csv'),
sep=',',
decimal='.',
date_format='%Y-%m-%d',
line_terminator='\n',
encoding='utf-8',
index=False)
print('\tDone!')
print('Done!')
# Write daily cumulated time series as csv
unified_daily_timeseries.to_csv(os.path.join(package_path, 'renewable_capacity_timeseries.csv'),
sep=',',
float_format='%.3f',
decimal='.',
date_format='%Y-%m-%d',
encoding='utf-8',
index=False)
print('Done!')
european_df.to_csv(os.path.join(package_path, 'renewable_power_plants_EU.csv'),
sep=',',
decimal='.',
date_format='%Y-%m-%d',
line_terminator='\n',
encoding='utf-8',
index=False)
print('Done!')
# Remove functions from the markers dictionary
validation_markers_new = {}
for marker_key in validation_markers:
validation_markers_new[marker_key] = {}
for description_key in validation_markers[key]:
description = validation_markers[marker_key][description_key]
if type(description) == str:
validation_markers_new[marker_key][description_key] = description
validation_markers = validation_markers_new
# Write csv of Marker Explanations
validation_marker_df = pd.DataFrame(validation_markers).transpose()
validation_marker_df = validation_marker_df.iloc[:, ::-1] # Reverse column order
validation_marker_df.index.name = 'Validation marker'
validation_marker_df.reset_index(inplace=True)
validation_marker_df.to_csv(os.path.join(package_path, 'validation_marker.csv'),
sep=',',
decimal='.',
date_format='%Y-%m-%d',
line_terminator='\n',
encoding='utf-8',
index=False
)
All country power plant list will be written in one xlsx-file. Each country power plant list is written in a separate sheet. If a country's dataset is too large to fit into a single sheet, we split it into several separate sheets. Each sheet is limited to 1,000,000 power plants at most.
The timeseries is placed in a sheet of its own.
An additional sheet includes the explanations of the markers.
Note: This process may take some time depending on your hardware.
# Define the function which writes a row of data to a row in an Excel sheet
def write_row(row, sheet, data_index, offset=0):
sheet_row_index = data_index + 1 - offset
for j, field in enumerate(row):
if pd.isna(field):
field = ''
sheet.write(sheet_row_index, j, field)
#sheet.write_row(sheet_row_index, 0, row)
# Define the function for converting column number in Excel names
# Note: the column numbering is assumed to start from zero.
def excel_column_name(number):
if number < 26:
return chr(ord('A') + number)
else:
return excel_column_name((number // 26) - 1) + excel_column_name(number % 26)
# Define the function for creating a named sheet in the given xlsxwriter Workbook, writing its header
# and formatting its columns according to a given dictionary of pairs {data_column: excel_format}.
def create_sheet(book, sheet_name, header, formats={}):
# Create the sheet
sheet = book.add_worksheet(name=sheet_name)
# Write the header
sheet.write_row(0, 0, header)
# Set the format of date columns
for j in range(len(header)):
column_format = formats.get(header[j], None)
if column_format is not None:
excel_name = excel_column_name(j)
sheet.set_column('{}:{}'.format(excel_name, excel_name), None, column_format)
return sheet
# Define the function for writing a dataframe to an Excel file efficiently.
# The goal is not just to split a df across several sheets if it's too large,
# but also to reduce the RAM usage and make the process run faster.
# Make sure that book is an xlsxwriter.Workbook with constant memory set to True.
# That way, the data written to its sheets will be flushed after each row, so RAM usage
# will be kept at a constant level. Without constant_memory set to True, the same data would be copied
# to the sheets, which are kept in RAM, so the total amount of RAM used to store the data would at least double
# which could make everything slower if you don't have enough RAM.
# Note: pandas does provide method to_excel which receives an Excel writer, but
# even if you set its parameter constant_memory to True, it won't work because to_excel writes the data
# column by column, whereas constant_memory requires the data to be written row after row.
def write_df_to_excel(df, book, max_sheet_size, formats={}, name=None):
# Get the number of rows in the df
number_of_rows = df.shape[0]
# Get the df's header
header = df.columns
# Check if the df is too large to fit in to one sheet.
if number_of_rows > max_sheet_size:
# If so, define the indices which will separate the sheets.
boundaries = list(range(0, number_of_rows, max_sheet_size)) + [number_of_rows]
# Include the final index if it is not already there
if boundaries[-1] != number_of_rows:
boundaries += [number_of_rows]
number_of_sheets = len(boundaries) - 1
# Split the data across the sheets
# so that the i-th sheet contains the data whose indices are in the range [splitters[i], splitters[i+1])
print('\tSplitting the data into {} sheets'.format(number_of_sheets))
for i in range(number_of_sheets):
# Define the sheet's name.
if name is not None:
sheet_name_format = name + ' part-{}'
else:
sheet_name_format = 'part-{}'
sheet_name = sheet_name_format.format(i + 1) # i + 1 because i is a zero-based index
# and 1-based indices are more readable
# Create the sheet
sheet = create_sheet(book, sheet_name, header, formats)
# Get the sheet's boundary indices.
start = boundaries[i]
end = boundaries[i + 1]
# Calculate the offset. It is 0 for the first sheet.
# For all the other sheets, it is equal to the total number of rows written before the sheet at hand.
if i == 0:
offset = 0
else:
offset = i * max_sheet_size
# Write the data.
print('\t\tWriting [{}:{}] into the sheet number {}'.format(start, end, i + 1))
for data_int_index in range(number_of_rows):
row = df.iloc[data_int_index, :]
write_row(row, sheet, data_int_index, offset)
else:
# Create the sheet for the df
sheet = create_sheet(book, name, header, formats)
# Write the data
for data_int_index in range(number_of_rows):
row = df.iloc[data_int_index, :]
write_row(row, sheet, data_int_index)
# Create an empty xlsx file
xlsx_path = os.path.join(package_path, 'renewable_power_plants.xlsx')
book = xlsxwriter.Workbook(xlsx_path, {'constant_memory': True})
# Set the max size of a sheet to 1,000,000 power plants.
max_sheet_size = 10**6
# Define the format for the dates
date_format = book.add_format({'num_format':'yyyy-mm-dd'})
# Write the datasets of the countries (dirty as well as clean).
for country in dfs:
print(country)
# Specify the Excel formatting rules
column_formats = {column: date_format for column in dfs[country] if 'date' in column}
# Write the country data to the Excel file
write_df_to_excel(dfs[country], book, max_sheet_size, formats=column_formats, name=country)
print('\tDone!')
# Write the markers' explanations
print('Write the validation markers sheet')
write_df_to_excel(validation_marker_df, book, max_sheet_size, name='validation_marker')
print('\tDone!')
# Write the timeseries
print('Write the timeseries')
# Make sure that `day` gets formatted as a date yyyy-mm-dd
column_formats = {'day' : date_format}
# Write the series
write_df_to_excel(unified_daily_timeseries, book, max_sheet_size, formats=column_formats,
name='capacity_timeseries')
print('\tDone!')
print('Save the Excel file.')
book.close()
print('Done!')
engine = sqlalchemy.create_engine('sqlite:///' + package_path + '/renewable_power_plants.sqlite')
for country in all_countries_including_dirty:
if country in dfs:
print(country)
# Parameter chunksize is for lower-memory computers. Removing it might speed things up.
dfs[country].to_sql(table_names[country], engine, if_exists="replace", chunksize=100000, index=False)
print('\tDone!')
print('Validation markers')
validation_marker_df.to_sql('validation_marker', engine, if_exists="replace", chunksize=100000, index=False)
print('Done!')
# Save the european df as sqlite
european_df.to_sql('renewable_power_plants_EU', engine, if_exists="replace", chunksize=100000, index=False)
# Save timeseries as sqlite
unified_daily_timeseries.to_sql('renewable_capacity_timeseries', engine, if_exists="replace", chunksize=100000, index=False)
The Data Packages meta data are created in the specific JSON format as proposed by the Open Knowledge Foundation. Please see the Frictionless Data project by OKFN (http://data.okfn.org/) and the Data Package specifications (http://dataprotocols.org/data-packages/) for more details.
In order to keep the Jupyter Notebook more readable the metadata is written in the human-readable YAML format using a multi-line string and then parse the string into a Python dictionary and save it as a JSON file.
# Automatically generate some metadata strings such as the list of countries covered, the list of sources etc.
country_list_filepath = os.path.join('input', 'countries.csv')
countries_df = pd.read_csv(country_list_filepath)
countries_df.set_index('full_name', inplace=True)
countries_dict = countries_df.to_dict(orient='index')
covered_countries = ""
countries = sorted(countries_dict.keys())
for country in countries:
description = countries_dict[country]['data_description']
covered_countries += ' {}: {}\n'.format(country, description)
list_of_countries = ', '.join(countries[:-1]) + ' and ' + countries[-1]
list_of_countries_noand = ', '.join(countries)
list_of_countries_keywords = ','.join(countries).lower()
source_list_filepath = os.path.join('input', 'sources.csv')
sources_df = pd.read_csv(source_list_filepath)
sources_metadata = """ - title: Postleitzahlen Deutschland
path: http://www.suche-postleitzahl.org/downloads
description: Zip codes of Germany linked to geo-information
- title: GeoNames
path: http://download.geonames.org/export/zip/
description: The GeoNames geographical database which covers all countries and contains over eleven million placenames that are available for download free of charge.
- title: Eurostat (NUTS tables)
path: https://ec.europa.eu/eurostat/home?
description: The data for mapping coordinates, postcodes, municipality names and codes to NUTS region codes.
"""
source_format = """ - title: {source}
path: {url}
description: {short_description}
"""
geonames_eurostat_mask = sources_df['source'].isin(['Geonames', 'Eurostat'])
sources_df = sources_df[~geonames_eurostat_mask]
#display(sources_df)
sources_dict = sources_df[["full_name", "url", "short_description"]]\
.set_index("full_name" )\
.to_dict(orient="index")
for source in sources_dict:
source_metadata = {'source' : source}
source_metadata.update(sources_dict[source])
sources_metadata += source_format.format(**source_metadata)
metadata = """
hide: yes
profile: data-package
_metadataVersion: 1.2
name: opsd_renewable_power_plants
title: Renewable power plants
description: List of renewable energy power stations
longDescription: >-
This Data Package contains a list of renewable energy power plants in lists of
renewable energy-based power plants of {list_of_countries}.
{covered_countries}
Due to different data availability, the power plant lists are of different
accurancy and partly provide different power plant parameter. Due to that, the
lists are provided as seperate csv-files per country and as separate sheets in the
excel file. Suspect data or entries with high probability of duplication are marked
in the column 'comment'. Theses validation markers are explained in the file
validation_marker.csv.
Additionally, the Data Package includes daily time series of cumulated
installed capacity per energy source type for Germany, Denmark, Switzerland, the United Kingdom and Sweden. All data processing is
conducted in Python and pandas and has been documented in the Jupyter Notebooks linked below.
keywords: [master data register,power plants,renewables,{list_of_countries_keywords},open power system data]
spatial:
location: {list_of_countries_noand}
resolution: Power plants, municipalities
resources:
- name: renewable_power_plants_de
profile: tabular-data-resource
path: renewable_power_plants_DE.csv
title: Renewable power plants in Germany
format: csv
mediatype: text/csv
encoding: UTF-8
schema:
missingValues: [""]
primaryKey: eeg_id
fields:
- name: electrical_capacity
description: Installed electrical capacity in MW
type: number
unit: MW
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: data_source
description: Source of database entry
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. DE1).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. DE11).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. DE111).
type: string
- name: lon
description: Longitude coordinates
type: number
- name: lat
description: Latitude coordinates
type: number
- name: municipality
description: Name of German Gemeinde (municipality)
type: string
- name: municipality_code
description: German Gemeindenummer (municipalitiy number)
type: string
- name: postcode
description: German zip-code
type: string
- name: address
description: Street name or name of land parcel
type: string
- name: federal_state
description: Name of German administrative level 'Bundesland'
type: string
- name: commissioning_date
description: Date of commissioning of specific unit
type: date
opsdContentfilter: "true"
- name: decommissioning_date
description: Date of decommissioning of specific unit
type: date
- name: voltage_level
description: Voltage level of grid connection
type: string
- name: eeg_id
description: Power plant EEG (German feed-in tariff law) remuneration number
type: string
- name: dso
description: Name of distribution system operator of the region the plant is located in
type: string
- name: dso_id
description: Company number of German distribution grid operator
type: string
- name: tso
description: Name of transmission system operator of the area the plant is located
type: string
- name: renewable_power_plants_dk
path: renewable_power_plants_DK.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: commissioning_date
description: date of the plant's commissioning
type: date
opsdContentfilter: "true"
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source.
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: dso
description: Name of distribution system operator of the region the plant is located in
type: string
- name: gsrn_id
description: Danish wind turbine identifier number (GSRN)
type: integer
- name: postcode
description: Danish zip-code
type: string
- name: municipality_code
description: Danish 3-digit Kommune-Nr
type: string
- name: municipality
description: Name of Danish Kommune
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. DK0).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. DK01).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. DK013).
type: string
- name: address
description: Street name or name of land parcel
type: string
- name: lat
description: Latitude coordinates
type: number
- name: lon
description: Longitude coordinates
type: number
- name: hub_height
description: Wind turbine hub heigth in m
type: number
- name: rotor_diameter
description: Wind turbine rotor diameter in m
type: number
- name: manufacturer
description: Company that has built the wind turbine
type: string
- name: model
description: Wind turbine model type
type: string
- name: data_source
description: Source of database entry
type: string
- name: renewable_power_plants_fr
path: renewable_power_plants_FR.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: site_name
description: The power plant's name.
type: string
- name: EIC_code
description: Energy Identification Code - the plant's unique identifier in the French grid
type: string
- name: IRIS_code
description: IRIS code
type: string
- name: commissioning_date
description: The date of the plant's commissioning
opsdContentfilter: "true"
type: date
- name: connection_date
description: The data when the plant was connected to the French grid
opsdContentfilter: "true"
type: date
- name: disconnection_date
description: The date that the plant was disconnected from the French grid
opsdContentFilter: "true"
type: date
- name: departement
description: The name of the French departement
type: string
- name: departement_code
description: The number of the French departement
type: integer
- name: municipality
description: Name of French Commune
type: string
- name: municipality_code
description: French 5-digit INSEE code for Communes
type: integer
- name: municipality_group
description: Name of the group of municipalities the plant is located in.
type: string
- name: municipality_group_code
description: Code of the group of municipalities the plant is located in.
type: integer
- name: region
description: Name of the French region
type: string
- name: region_code
description: Code of the French region
type: integer
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. FR1).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. FR10).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. FR101).
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: number_of_installations
description: Number of installations of the energy source subtype in the municipality. Due to confidentiality reasons, the values smaller than 3 are published as ''<3'' (as in the source).
type: integer
bareNumber: false
- name: lat
description: Latitude coordinates
type: number
- name: lon
description: Longitude coordinates
type: number
- name: data_source
description: Source of database entry
type: string
- name: as_of_year
description: Year for which the data source compiled the original dataset.
type: integer
- name: renewable_power_plants_pl
path: renewable_power_plants_PL.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
primaryKey: URE_id
fields:
- name: URE_id
type: integer
description: The URE id of the plant.
- name: region
type: string
description: The name of the Polish voivodeship.
- name: district
description: The name of the Polish powiat.
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. PL1).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. PL11).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. PL113).
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
opsdContentfilter: "true"
type: string
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: data_source
description: Source of database entry
type: string
- name: as_of_year
description: Year for which the data source compiled the original dataset.
type: integer
- name: renewable_power_plants_uk
path: renewable_power_plants_UK.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
primaryKey: uk_beis_id
fields:
- name: commissioning_date
description: Date of commissioning of specific unit
type: date
opsdContentfilter: "true"
- name: uk_beis_id
description: ID for the plant as assigned by UK BEIS.
type: integer
- name: site_name
description: Name of site
type: string
- name: operator
description: Name of operator
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
opsdContentfilter: "true"
type: string
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
description: Installed electrical capacity in MW
unit: MW
type: number
- name: chp
description: Is the project capable of combined heat and power output
type: string
- name: capacity_individual_turbine
description: For windfarms, the individual capacity of each wind turbine in megawatts (MW)
type: number
- name: number_of_turbines
description: For windfarms, the number of wind turbines located on the site
type: integer
- name: solar_mounting_type
description: For solar PV developments, whether the PV panels are ground or roof mounted
type: string
- name: address
description: Address
type: string
- name: municipality
description: Municipality
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. UKD).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. UKD1).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. UKC12).
type: string
- name: region
description: Region
type: string
- name: country
description: The UK's constituent country in which the facility is located.
type: string
- name: postcode
description: Postcode
type: string
- name: lat
description: Latitude coordinates
type: string
- name: lon
description: Longitude coordinates
type: string
- name: data_source
description: The source of database entries
type: string
- name: comment
description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv
type: string
- name: renewable_power_plants_ch
path: renewable_power_plants_CH.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: commissioning_date
description: Commissioning date
type: date
opsdContentfilter: "true"
- name: municipality
description: Municipality
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. CH0).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. CH03).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. CH031).
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: municipality_code
description: Municipality code
type: integer
- name: project_name
description: Name of the project
type: string
- name: production
description: Yearly production in MWh
type: number
- name: tariff
description: Tariff in CHF for 2016
type: number
- name: contract_period_end
description: End of subsidy contract
type: date
- name: address
description: Street name
type: string
- name: canton
description: Name of the cantones/ member states of the Swiss confederation
type: string
- name: company
description: Name of the company
type: string
- name: lat
description: Latitude coordinate
type: number
- name: lon
description: Longitude coordinate
type: number
- name: data_source
description: Source of database entry
type: string
- name: postcode
description: Swiss zip code
type: string
- name: renewable_power_plants_se
path: renewable_power_plants_SE.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: commissioning_date
description: Commissioning date
opsdContentfilter: "true"
type: date
- name: se_vindbrukskollen_id
description: The id in the vindbrukskollen data
type: string
- name: site_name
description: Name of site
type: string
- name: manufacturer
description: Manufacturer
type: string
- name: municipality
description: Municipality
type: string
- name: county
description: County
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. SE0).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. SE02).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. SE021).
type: string
- name: lat
description: Latitude coordinates
type: number
- name: lon
description: Longitude coordinates
type: number
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
opsdContentfilter: "true"
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW.
type: number
- name: data_source
description: Source of database entry
type: string
- name: renewable_power_plants_cz
path: renewable_power_plants_CZ.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: site_name
description: Name of site
type: string
- name: owner
description: Owner
type: string
- name: region
description: Region
type: string
- name: municipality
description: Municipality
type: string
- name: locality
description: Town or village
type: string
- name: postcode
description: Postcode
type: string
- name: lat
description: Latitude coordinates
type: number
- name: lon
description: Longitude coordinates
type: number
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. CZ0).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. CZ08).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. CZ08-).
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
opsdContentfilter: "true"
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW.
type: number
- name: data_source
description: Source of database entry
type: string
- name: res_plants_separated_de_outvalidated_plants
path: res_plants_separated_DE_outvalidated_plants.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: commissioning_date
type: date
description: Date of commissioning of specific unit
- name: decommissioning_date
type: date
description: Date of decommissioning of specific unit
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
unit: MW
- name: voltage_level
description: Voltage level of grid connection
type: string
- name: tso
description: Name of transmission system operator of the area the plant is located
type: string
- name: dso
description: Name of distribution system operator of the region the plant is located in
type: string
- name: dso_id
description: Company number of German distribution grid operator
type: string
- name: eeg_id
description: Power plant EEG (German feed-in tariff law) remuneration number
type: string
- name: federal_state
description: Name of German administrative level 'Bundesland'
type: string
- name: postcode
description: German zip-code
type: string
- name: municipality_code
description: German Gemeindenummer (municipalitiy number)
type: string
- name: municipality
description: Name of German Gemeinde (municipality)
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. DE1).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. DE11).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. DE111).
type: string
- name: address
description: Street name or name of land parcel
type: string
- name: lat
description: Latitude coordinates
type: number
- name: lon
description: Longitude coordinates
type: number
- name: data_source
description: Source of database entry
type: string
- name: comment
description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv
type: string
- name: res_plants_separated_fr_outvalidated_plants
path: res_plants_separated_FR_outvalidated_plants.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
schema:
missingValues: [""]
fields:
- name: site_name
description: The power plant's name.
type: string
- name: EIC_code
description: Energy Identification Code - the plant's unique identifier in the French grid
type: string
- name: IRIS_code
description: IRIS code
type: string
- name: commissioning_date
description: The date of the plant's commissioning
opsdContentfilter: "true"
type: date
- name: connection_date
description: The data when the plant was connected to the French grid
opsdContentfilter: "true"
type: date
- name: disconnection_date
description: The date that the plant was disconnected from the French grid
opsdContentFilter: "true"
type: date
- name: departement
description: The name of the French departement
type: string
- name: departement_code
description: The number of the French departement
type: integer
- name: municipality
description: Name of French Commune
type: string
- name: municipality_code
description: French 5-digit INSEE code for Communes
type: integer
- name: municipality_group
description: Name of the group of municipalities the plant is located in.
type: string
- name: municipality_group_code
description: Code of the group of municipalities the plant is located in.
type: integer
- name: region
description: Name of the French region
type: string
- name: region_code
description: Code of the French region
type: integer
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. FR1).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. FR10).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. FR101).
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: number_of_installations
description: Number of installations of the energy source subtype in the municipality. Due to confidentiality reasons, the values smaller than 3 are published as ''<3'' (as in the source).
type: integer
bareNumber: false
- name: lat
description: Latitude coordinates
type: number
- name: lon
description: Longitude coordinates
type: number
- name: data_source
description: Source of database entry
type: string
- name: as_of_year
description: Year for which the data source compiled the original dataset.
type: integer
- name: comment
description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv
type: string
- name: validation_marker
path: validation_marker.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
mediatype: text/csv
schema:
missingValues: [""]
primaryKey: Validation marker
fields:
- name: Validation marker
description: Name of validation marker utilized in column comment in the renewable_power_plant_germany.csv
type: string
- name: Long explanation
description: Explanation of the validation marker
type: string
- name: Short explanation
description: Short comment on the meaning of the marker
type: string
- name: Country
description: The country for which the marker is defined.
type: string
- name: renewable_power_plants_eu
path: renewable_power_plants_EU.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
mediatype: text/csv
schema:
missingValues: [""]
fields:
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: electrical_capacity
description: Installed electrical capacity in MW
unit: MW
type: number
- name: technology
description: Technology to harvest energt (e.g. Onshore, Photovoltaics)
type: string
- name: data_source
description: Source of database entry
type: string
- name: municipality
description: The name of the municipality in which the facility is located.
type: string
- name: nuts_1_region
description: The code of the NUTS 1 region the facility is in (e.g. NL1).
type: string
- name: nuts_2_region
description: The code of the NUTS 2 region the facility is in (e.g. NL11).
type: string
- name: nuts_3_region
description: The code of the NUTS 3 region the facility is in (e.g. NL112).
type: string
- name: lon
description: Geographical longitude
type: number
- name: lat
description: Geographical latitude
type: number
- name: commissioning_date
type: date
description: Date of commissioning of specific unit
- name: geographical_resolution
description: Precision of geographical information (exact power plant location, municipality, district)
type: string
- name: as_of_year
description: Year for which the data source compiled the corresponding dataset
type: integer
- name: country
description: The country in which the facility is located
type: string
- name: renewable_power_plants_xlsx
profile: data-resource
path: renewable_power_plants.xlsx
title: The whole package
description: The whole package as an Excel file with each country written in a sheet except for Germany which is spread across two, alongside with timeseries and validation markers.
format: xlsx
mediatype: application/vnd.ms-excel
encoding: UTF-8
schema:
missingValues: [""]
- name: renewable_capacity_timeseries
path: renewable_capacity_timeseries.csv
profile: tabular-data-resource
format: csv
encoding: UTF-8
mediatype: text/csv
schema:
missingValues: [""]
primaryKey: day
fields:
- name: day
type: date
description: The day of the timeseries entry
opsdContentfilter: "true"
- name: CH_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Bioenergy
type: number
source:
title: Own calculation based on plant-level data from Swiss Federal Office of Energy
path: input/original_data/CH/BFE/9669-Liste aller KEV-Bezüger im Jahr 2018.xlsx
- name: CH_solar_capacity
description: Cumulative solar electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from Swiss Federal Office of Energy
path: input/original_data/CH/BFE/9669-Liste aller KEV-Bezüger im Jahr 2018.xlsx
- name: CH_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from Swiss Federal Office of Energy
path: input/original_data/CH/BFE/9669-Liste aller KEV-Bezüger im Jahr 2018.xlsx
- name: DE_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Bioenergy
type: number
source:
title: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_geothermal_capacity
description: Cumulative geothermal electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Geothermal
type: number
source:
title: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_solar_capacity
description: Cumulative solar electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_wind_capacity
ription: Cumulative total wind electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Wind
type: number
source:
title: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Wind offshore
type: number
source:
title: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DK_solar_capacity
description: Cumulative solar electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from Energinet.dk
path: input/original_data/DK/Energinet/SolcellerGraf-2016-11.xlsx
- name: DK_wind_capacity
description: Cumulative total wind electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Wind
type: number
source:
title: Own calculation based on plant-level data from Danish Energy Agency
path: input/original_data/DK/Energistyrelsen/anlaegprodtilnettet.xls
- name: DK_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Wind offshore
type: number
source:
title: Own calculation based on plant-level data from Danish Energy Agency
path: input/original_data/DK/Energistyrelsen/anlaegprodtilnettet.xls
- name: DK_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from Danish Energy Agency
path: input/original_data/DK/Energistyrelsen/anlaegprodtilnettet.xls
- name: FR_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for France in MW
unit: MW
opsdProperties:
Region: France
Variable:
type: number
source:
title: Own calculation based on plant-level data from ODRE and Ministère de la Transition écologique et solidaire
- name: FR_geothermal_capacity
description: Cumulative geothermal electrical capacity for France in MW
unit: MW
opsdProperties:
Region: France
Variable: Geothermal
type: number
source:
title: Own calculation based on plant-level data from ODRE and Ministère de la Transition écologique et solidaire
- name: FR_hydro_capacity
description: Cumulative hydroenergy electrical capacity for France in MW
unit: MW
opsdProperties:
Region: France
Variable: Hydro
type: number
source:
title: Own calculation based on plant-level data from ODRE and Ministère de la Transition écologique et solidaire
- name: FR_marine_capacity
description: Cumulative marine electrical capacity for France in MW
unit: MW
opsdProperties:
Region: France
Variable: Marine
type: number
source:
title: Own calculation based on plant-level data from ODRE and Ministère de la Transition écologique et solidaire.
- name: FR_solar_capacity
description: Cumulative solar electrical capacity for France in MW
unit: MW
opsdProperties:
Region: France
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from ODRE and Ministère de la Transition écologique et solidaire.
- name: FR_wind_onshore_capacity
description: Cumulative wind onshore electrical capacity for France in MW
unit: MW
opsdProperties:
Region: France
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from ODRE and Ministère de la Transition écologique et solidaire.
- name: GB-GBN_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Bioenergy
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-GBN_hydro_capacity
description: Cumulative hydro electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Hydro
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-GBN_marine_capacity
description: Cumulative marine electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Marine
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-GBN_solar_capacity
description: Cumulative solar electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-GBN_wind_capacity
description: Cumulative total wind electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Wind
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-GBN_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Wind offshore
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-GBN_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-NIR_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Bioenergy
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-NIR_solar_capacity
description: Cumulative solar electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-NIR_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Bioenergy
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_hydro_capacity
description: Cumulative hydro electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Hydro
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_marine_capacity
description: Cumulative marine electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Marine
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_solar_capacity
description: Cumulative solar electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Solar
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_wind_capacity
description: Cumulative total wind electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Wind
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Wind offshore
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: GB-UKM_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from BEIS
path: input/original_data/UK/BEIS/renewable-energy-planning-database-march-2020-update.csv
- name: SE_wind_capacity
description: Cumulative total wind electrical capacity for Sweden in MW
unit: MW
opsdProperties:
Region: Sweden
Variable: Wind
type: number
source:
title: Own calculation based on plant-level data from Vindbrukskollen
path: input/original_data/SE/Vindbrukskollen/VBK_export_allman_prod.xlsx
- name: SE_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Sweden in MW
unit: MW
opsdProperties:
Region: Sweden
Variable: Wind offshore
type: number
source:
title: Own calculation based on plant-level data from Vindbrukskollen
path: input/original_data/SE/Vindbrukskollen/VBK_export_allman_prod.xlsx
- name: SE_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Sweden in MW
unit: MW
opsdProperties:
Region: Sweden
Variable: Wind onshore
type: number
source:
title: Own calculation based on plant-level data from Vindbrukskollen
path: input/original_data/SE/Vindbrukskollen/VBK_export_allman_prod.xlsx
sources:
{sources_metadata}
contributors:
- title: Ingmar Schlecht
role: maintainer
organization: Neon GmbH
email: schlecht@neon-energie.de
- title: Milos Simic
role: contributor
email: milos.simic.ms@gmail.com
""".format(**{
'list_of_countries' : list_of_countries,
'list_of_countries_noand' : list_of_countries_noand,
'list_of_countries_keywords' : list_of_countries_keywords,
'covered_countries' : covered_countries,
'sources_metadata' : sources_metadata
})
metadata = yaml.load(metadata)
metadata['homepage'] = 'https://data.open-power-system-data.org/renewable_power_plants/' + settings['version']
metadata['id'] = 'https://doi.org/10.25832/renewable_power_plants/' + settings['version']
metadata['last_changes'] = settings['changes']
metadata['version'] = settings['version']
metadata['publicationDate'] = settings['version']
metadata['_external'] = 'false'
metadata['attribution'] = 'Open Power System Data. 2020. Data Package Renewable power plants.' +\
' Version 2020-05-20. https://doi.org/10.25832/renewable_power_plants/2020-05-20.' +\
' (Primary data from various sources, for a complete list see URL)'
metadata['_metadataVersion'] = '1.2'
lastYear = int(settings['version'][0:4])-1
metadata['temporal'] = {
'referenceDate': settings['referenceDate']
}
metadata['documentation'] = 'https://github.com/Open-Power-System-Data/renewable_power_plants/blob/'+settings['version']+'/main.ipynb'
datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': '), ensure_ascii=False)
# Add metadata fields to conform to the OPDF Metadata version 1.2
def get_sha_hash(path, blocksize=65536):
sha_hasher = hashlib.sha256()
with open(path, 'rb') as f:
buffer = f.read(blocksize)
while len(buffer) > 0:
sha_hasher.update(buffer)
buffer = f.read(blocksize)
return sha_hasher.hexdigest()
for resource in metadata['resources']:
if resource['format'] == 'csv':
resource['dialect'] = {
'delimiter' : ',',
'decimalChar' : '.',
'lineTerminator' : '\\n',
'header' : 'true'
}
file_name = resource['path']
file_path = os.path.join(package_path, file_name)
resource['hash'] = get_sha_hash(file_path)
resource['size'] = os.stat(file_path).st_size
resource['profile'] = 'tabular-data-resource'
# Sort the fields for each resource according to the default order
# (except for the timeseries and markers).
for i, resource in enumerate(metadata['resources']):
if resource['name'] not in ['validation_marker', 'renewable_capacity_timeseries']:
print(resource['name'])
fields = resource['schema']['fields']
sorted_fields = sorted(fields, key = lambda field: default_order.index(field['name']))
resource['schema']['fields'] = sorted_fields
# Create JSON
datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': '), ensure_ascii=False)
# Write the JSON metadata
with open(os.path.join(package_path, 'datapackage.json'), 'w', encoding='utf-8') as f:
f.write(datapackage_json)
f.close()
Generates checksums.txt
files = [
'validation_marker.csv',
'renewable_power_plants.sqlite',
'renewable_power_plants.xlsx',
]
for country in all_countries_including_dirty:
if country in table_names:
files.append(table_names[country]+'.csv')
files.append('renewable_capacity_timeseries.csv')
files.append('renewable_power_plants_EU.csv')
with open('checksums.txt', 'w') as f:
for file_name in sorted(files):
print(file_name)
file_hash = get_sha_hash(os.path.join(package_path, file_name))
f.write('{},{}\n'.format(file_name, file_hash))
print('\tDone!')
print('Done!')