#!/usr/bin/env python # coding: utf-8 # dsi logo # # # Electricity production plants - Visualisation Plots # # # # Data origins from https://opendata.swiss/en/dataset/elektrizitatsproduktionsanlagen and we use the following .csv files. # # - ElectricityProductionPlant.csv # - MainCategoryCatalogue.csv # - SubCategoryCatalogue.csv # - PlantCategoryCatalogue.csv # # # ### Loading data # # We use `pd.read_csv` to read the csv files into a `DataFrame`. # # After reading we set the index to the corresponding column which makes it easier to join tables and join the (sub)-category names in order to have all information in one `epp` table # In[25]: import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np plt.rcParams["figure.figsize"] = (20, 10) epp = pd.read_csv('../data/ch.bfe.elektrizitaetsproduktionsanlagen/ElectricityProductionPlant.csv', parse_dates=['BeginningOfOperation']).set_index('xtf_id') mainCat = pd.read_csv('../data/ch.bfe.elektrizitaetsproduktionsanlagen/MainCategoryCatalogue.csv').set_index('Catalogue_id') subCat = pd.read_csv('../data/ch.bfe.elektrizitaetsproduktionsanlagen/SubCategoryCatalogue.csv').set_index('Catalogue_id') plantCat = pd.read_csv('../data/ch.bfe.elektrizitaetsproduktionsanlagen/PlantCategoryCatalogue.csv').set_index('Catalogue_id') lang='de' epp = epp.merge(mainCat[lang].rename("MainCategoryName"),how='left', left_on='MainCategory', right_index=True) epp = epp.merge(subCat[lang].rename("SubCategoryName"),how='left', left_on='SubCategory', right_index=True) epp = epp.merge(plantCat[lang].rename("PlantCategoryName"),how='left', left_on='PlantCategory', right_index=True) display(epp.head()) # *** # ## Pie chart # #
# Exercise: Plot the energy mix in a pie chart a Series # # 1. Create a pie chart that displays the relative contribution of the main categories to the total power produced in Switzerland. # # 2. Create a pie chart that displays the relative contribution of the main categories to the total number of power plants in Switzerland. # # 3. (Optional) Can you plot both figures as subplots in one plot? # # 4. (Optional) Plot the relative contribution for the sub categories for `maincat_2`. # # # #
# # *Hints* # - To prepare the necessary DataFrame you can use the `DataFrame.groupby()` function and sum on the `TotalPower` column: `epp.groupby('MainCategoryName')['TotalPower'].sum()` # - To plot you can use directly the function `Series.plot.pie()` [Pandas Doc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.plot.pie.html) # - To count instead of sum a grouped value use `.count()` # # *** # # In[ ]: # *** # ## Bar chart # #
# Exercise: Plot the energy data by canton # # 1. Use `sns.countplot` to create a bar chart that displays the number of fossil fuel power plants (`epp.MainCategory == 'maincat_4'`) for each canton. Can you also display the different sub-categories? # # 2. Create a stacked bar chart that displays the contribution to alternative energies (`epp.MainCategory == 'maincat_2'`) for each canton. # # 3. (Optional) Can you create a similar bar chart with seaborn, i.e. `sns.barplot()`? P # # 4. (Optional) Play around with the different `seaborn` themes `darkgrid`, `whitegrid`, `dark`, `white`, and `ticks`. # # #
# # *Hints* # - With `sns.countplot` you can provide your entier dataset # - To prepare the necessary DataFrame you can use the `DataFrame.groupby()` function and sum on the `TotalPower` column: `epp.groupby(['Canton', 'SubCategoryName'], as_index=False)['TotalPower'].sum()`. Furthermore, you can use `DataFrame.pivot_table()` to explode the sub categories into columns. # # # *** # # In[ ]: # *** # ## History Plots # #
# Exercise: Plot the energy mix over time # # **Step 1**: Create a line plot over the time that cumulates the energy that is produced until then. # # **Step 2**: Create a line plot with multiple lines. Each line represents the cumulative sum of the energy produced by one main category # # **Step 3**: Create an area plot with the same data as in step 2. With the area plot you can show the individual as well as the total values. # #
# # *Hints* # - **Step 1**: # - With `df = epp.groupby('BeginningOfOperation')['TotalPower' ].sum().sort_index()` you can create a `DataFrame` with the added `TotalPower` for each `BeginningOfOperations` date # - use the `df.cumsum` function to return the cumulative sum that you can then plot directly with pandas into a `df.plot.line` plot. # # - **Step 2**: # - Do the `groupby` not only by `BeginningOfOperation` but by `['BeginningOfOperation', 'MainCategoryName']`. Use `as_index=False` to have a `DataFrame` as a result of the grouping and not a `Series` # - use `DataFrame.pivot_table()` to explode the main categories into columns. # - use `data.replace(np.nan, 0)` to replace missing values with 0 # *** # # In[ ]: