#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('run', 'Requirements.ipynb') get_ipython().run_line_magic('run', 'Country_ISO_Codes.ipynb') # # Sources used in Greenhouse Data # All of these inventories have been homogenized (as much as possible/recommended) by making sure they all include the following characteristics: # - Country ISO codes (ISO-3). This is useful to easily create emission figures for all countries included in an inventory. # - Emission units in CO2 or CO2-equivalent (CO2eq). Some inventories give native units of gases (for example kt of CH4), so columns were added for easy conversion using GWPs. # # Furthermore, thank you to the following people: # - Mauricio Foronda, for all the coding help # - Nicolás Guarín-Zapata (nicoguaro in github) for the neon style in matplotlib, and for inspiring my NEONIZE() function. # - My thesis supervisors: Sonia Seneviratne, Anthony Patt, Jonas Schwabb # - Open Climate Data, from which I took the UNFCCC submission inventories. # # # BP # # INFO: # - BP. (2021). Methodology for calculating CO 2 emissions from energy use. https://www.ipcc-nggip.iges.or.jp/public/2006gl/vol2.html # - Downloaded from "https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2021-all-data.xlsx" # In[4]: print( ">>>>>>> BP <<<<<<<<") # download file # link_bp = "https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2021-all-data.xlsx" # resp = requests.get(link_bp) # output = open('raw_data/BP/bp-stats-review-2021-all-data.xlsx', "wb") # output.write(resp.content) # output.close() #read file df_bp = pd.read_excel(io="../raw_data/BP/bp-stats-review-2021-all-data.xlsx",sheet_name=4,header=2) #clean df_bp = df_bp[df_bp[2020].notnull()] df_bp = df_bp.rename(columns = {"Million tonnes of carbon dioxide":"name"}) df_bp = df_bp.drop(["2020.1","2009-19","2020.2"], axis=1)#deleting last three columns df_bp.loc[:,1965:] = df_bp.loc[:,1965:].astype(float) #add unit df_bp.insert(1, "unit", "MtCO2") #reindex df_bp = df_bp.reset_index().drop("index", axis=1) # CODE_GENERATOR_ISO3 df_bp.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_bp, show_missing=True,show_duplicates_nan=True) # ----------------------------------------------------------------------------------------------------- #global? df_bp_global = df_bp[df_bp.name=="Total World"].iloc[0,:] df_bp_global.to_csv("../clean_data/global_subsets/BP_global.csv") # ------------------------------------------------------------------------------------------------------------------ df_bp.to_csv("../clean_data/BP_global.csv", index=None) df_bp = pd.read_csv("../clean_data/BP.csv") df_bp # # CDIAC # INFO: # - Carbon Dioxide Information Analysis Center # - CDIAC. (2017). Global, Regional, and National Fossil-Fuel CO2 Emissions. https://cdiac.ess-dive.lbl.gov/trends/emis/overview_2013.html # - DOI: 10.3334/CDIAC/00001_V2017 # - Authors: Tom Boden and Bob Andres (Oak Ridge National Laboratory); Gregg Marland (Appalachian State University) # # NOTES: # - Units: given in carbon units, not carbon dioxide (converted by multiplying values by 44/12) # In[13]: print( ">>>>>>> CDIAC <<<<<<<<") #download file link_cdiac = "http://cdiac.ess-dive.lbl.gov/ftp/ndp030/CSV-FILES/nation.1751_2014.csv" resp = requests.get(link_cdiac) output = open('../raw_data/CDIAC/nation.1751_2014.csv',"wb") output.write(resp.content) output.close() #read file df_cdiac = pd.read_csv("../raw_data/CDIAC/nation.1751_2014.csv") # #clean df_cdiac = df_cdiac.drop([0,1,2]) df_cdiac = df_cdiac.rename(columns = {"Nation":"name"}) df_cdiac = df_cdiac.replace(".",np.nan) #remove empty cells marked with . to NaN or 0? NaN df_cdiac.iloc[:,2:] = df_cdiac.iloc[:,2:].astype(float, errors="raise") #CONVERT Carbon units to Carbon Dioxide units convert_C_to_CO2=44/12 df_cdiac.iloc[:,2:] = df_cdiac.iloc[:,2:] * convert_C_to_CO2 / 1000 #convert years to integers? df_cdiac.Year = df_cdiac.Year.astype(int) #add unit (and change column name, unit multiplied by 1000) df_cdiac.insert(2, "unit", "MtCO2") df_cdiac = df_cdiac.rename(columns={"Total CO2 emissions from fossil-fuels and cement production (thousand metric tons of C)":"Total CO2 emissions from fossil-fuels and cement production (MtCO2)", "Per capita CO2 emissions (metric tons of carbon)": "Per capita CO2 emissions (tCO2)"}) # reindex df_cdiac = df_cdiac.reset_index().drop("index", axis=1) # CODE_GENERATOR_ISO3 df_cdiac.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_cdiac, show_missing=True,show_duplicates_nan=True) # global df_cdiac_global = df_cdiac.groupby(["Year","unit"]).sum().reset_index() df_cdiac_global.to_csv("../clean_data/global_subsets/CDIAC_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_cdiac.to_csv("../clean_data/CDIAC.csv", index=None) df_cdiac = pd.read_csv("../clean_data/CDIAC.csv") df_cdiac_global # # EIA # # INFO: # - Energy Information Administration of the United States of America # - Data downloaded from website of eia.gov > international > other statistics > emissions by fuel type, and manually cleaned in excel, saved as EIA_raw_processed_all.csv # - https://www.eia.gov/international/data/world/other-statistics/emissions-by-fuel # In[16]: print( ">>>>>>> EIA <<<<<<<<") #read file df_eia = pd.read_csv("../raw_data/EIA/EIA_raw_processed_all.csv") #clean df_eia = df_eia.drop("Unnamed: 0", axis=1) df_eia = df_eia.rename({'Unnamed: 1': 'name', 'Unnamed: 2': 'fuel'}, axis=1) # new method df_eia = df_eia.replace("--",np.nan) df_eia.loc[:,"1949":] = df_eia.loc[:,"1949":].astype(float) df_eia.loc[:,"1949":].columns = df_eia.loc[:,"1949":].columns.astype(int) df_eia.fuel = df_eia.fuel.replace({" CO2 emissions (MMtonnes CO2)":"Total CO2 emissions", " Coal and coke (MMtonnes CO2)":"Coal and coke", " Consumed natural gas (MMtonnes CO2)": "Consumed natural gas", " Petroleum and other liquids (MMtonnes CO2)": "Petroleum and other liquids"}) #delete all empty unnecesary rows for x in range(231): equation1 = 6*x equation2 = 6*x+1 df_eia = df_eia.drop([equation1]) df_eia = df_eia.drop([equation2]) # #add unit df_eia.insert(2, "unit", "MtCO2") # #reindex df_eia = df_eia.reset_index().drop("index", axis=1) # CODE_GENERATOR_ISO3 df_eia.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_eia, show_missing=True,show_duplicates_nan=True) # #global? year = 2018 df_eia_global = df_eia[df_eia.name=="World"] df_eia_global.to_csv("../clean_data/global_subsets/EIA_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_eia.to_csv("../clean_data/EIA.csv", index=None) df_eia = pd.read_csv("../clean_data/EIA.csv") df_eia_global # # IEA # # INFO: # - International Energy Agency's GHG emissions from energy *highlights* # - downloaded from https://www.iea.org/data-and-statistics/data-product/greenhouse-gas-emissions-from-energy-highlights # # NOTES: # - this dataset includes all greenhouse gases, but only shows quantities in carbon dioxide equivalents # - should I delete bunkers in the main df? (currently they are included) # - Sectoral data by country is available in raw_data/IEA/cleaning_products folder # In[17]: print( ">>>>>>> IEA <<<<<<<<") # read excel file excel_df_dict = pd.read_excel("../raw_data/IEA/GHGHighlights.XLS",sheet_name=None,header=3) # Cleaning 1: of the whole excel (wow), making individual dataframes sheet_list = excel_df_dict.keys() sheet_list = ['CONTENTS', 'GHG Energy', 'GHG Fugi', 'GHG FC', 'GHG FC - Coal', 'GHG FC - Oil', 'GHG FC - Gas', 'CO2 MARBUNK', 'CO2 AVBUNK', 'SECTOR', 'SECTOREH', 'CO2-TES', 'CO2-GDP', 'CO2-GDP PPP', 'CO2-POP', 'SECTPOP', 'KAYA', 'TIMEEXTENDED', 'GEO COVERAGE'] country_list = excel_df_dict["GHG FC"][20:189]["million tonnes of CO2 eq"].array sheet_list_24line = ['GHG FC', 'GHG FC - Coal', 'GHG FC - Oil', 'GHG FC - Gas'] sheet_list_22line = ['GHG Energy', 'GHG Fugi','CO2 MARBUNK', 'CO2 AVBUNK', 'SECTOR', 'SECTOREH'] df_list24 = ["total_fuel_combustion", "coal", "oil", "gas"] df_list22 = ["energy", "fugitive", "marine_bunkers", "int_aviation_bunkers", "sectors_2019", "sectors_alt_2019"] for i,j in enumerate(sheet_list_24line): # print(j) df = excel_df_dict[j] df = df.drop([0,2,10,12,17,18,19]) df = df.iloc[:-3,:] df.insert(1,"type", "{}".format(df_list24[i]), allow_duplicates=True) df.insert(2,"unit","MtCO2eq", allow_duplicates=True) df = df.replace("..",np.nan) df = df.replace("x",np.nan) df.iloc[:,3:] = df.iloc[:,3:].astype(float) df = df.rename(columns={"million tonnes of CO2 eq":"name"}) df = df[df.name.notna()] df = df.reset_index().drop("index", axis=1) df.to_csv("../raw_data/IEA/Cleaning_products/IEA_{}.csv".format(df_list24[i])) # display(df) for i,j in enumerate(sheet_list_22line): # print(j) df = excel_df_dict[j] df = df.drop([0,2,10,12,15,16,17,]) df = df.iloc[:-3,:] df.insert(1,"type", "{}".format(df_list22[i]), allow_duplicates=True) df.insert(2,"unit","MtCO2eq", allow_duplicates=True) df = df.replace("..",np.nan) df = df.replace("x",np.nan) df.iloc[:,3:] = df.iloc[:,3:].astype(float) df = df.rename(columns={"kilo tonnes of CO2 eq":"name"}) #for fugitive! df = df.rename(columns={"million tonnes of CO2":"name"}) #for bunkers df = df.rename(columns={"million tonnes of CO2 eq":"name"}) df = df[df.name.notna()] df = df.reset_index().drop("index", axis=1) df.to_csv("../raw_data/IEA/Cleaning_products/IEA_{}.csv".format(df_list22[i])) # display(df) #Cleaning 2: Stitch everything together! df_iea = pd.concat([ pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_energy.csv"), pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_total_fuel_combustion.csv"), pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_coal.csv"), pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_oil.csv"), pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_gas.csv"), pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_marine_bunkers.csv"), #should i keep these last three? pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_int_aviation_bunkers.csv"), #should i keep these last three? pd.read_csv("../raw_data/IEA/Cleaning_products/IEA_fugitive.csv"), #should i keep these last three? ]) #Cleaning 3: reset index and fix fugitive (/1000) df_iea = df_iea.drop("Unnamed: 0", axis=1).reset_index().drop("index", axis=1) df_iea.loc[df_iea.type=="fugitive", "1971":] = df_iea.loc[df_iea.type=="fugitive", "1971":]/1000 # CODE_GENERATOR_ISO3 df_iea.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_iea, show_missing=True,show_duplicates_nan=True) #global df_iea_global = df_iea[df_iea.name=='World'] df_iea_global.to_csv("../clean_data/global_subsets/IEA_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_iea.to_csv("../clean_data/IEA.csv", index=None) df_iea = pd.read_csv("../clean_data/IEA.csv") df_iea_global # # GCP # # INFO: # - Global Carbon Project's main dataset: production/territorial-based CO2 emissions # - Reference of the full global carbon budget 2021: Pierre Friedlingstein, Matthew W. Jones, Michael O'Sullivan, Robbie M. Andrew, Dorothee, C. E. Bakker, Judith Hauck, Corinne Le Quéré, Glen P. Peters, Wouter Peters, Julia Pongratz, Stephen Sitch, Josep G. Canadell, Philippe Ciais, Rob B. Jackson, Simone R. Alin, Peter Anthoni, Nicholas R. Bates, Meike Becker, Nicolas Bellouin, Laurent Bopp, Thi Tuyet Trang Chau, Frédéric Chevallier, Louise P. Chini, Margot Cronin, Kim I. Currie, Bertrand Decharme, Laique M. Djeutchouang, Xinyu Dou, Wiley Evans, Richard A. Feely, Liang Feng, Thomas Gasser, Dennis Gilfillan, Thanos Gkritzalis, Giacomo Grassi, Luke Gregor, Nicolas Gruber, Özgür Gürses, Ian Harris, Richard A. Houghton, George C. Hurtt, Yosuke Iida, Tatiana Ilyina, Ingrid T. Luijkx, Atul Jain, Steve D. Jones, Etsushi Kato, Daniel Kennedy, Kees Klein Goldewijk, Jürgen Knauer, Jan Ivar Korsbakken, Arne Körtzinger, Peter Landschützer, Siv K. Lauvset, Nathalie Lefèvre, Sebastian Lienert, Junjie Liu, Gregg Marland, Patrick C. McGuire, Joe R. Melton, David R. Munro, Julia E.M.S Nabel Shin-Ichiro Nakaoka, Yosuke Niwa, Tsuneo Ono, Denis Pierrot, Benjamin Poulter, Gregor Rehder, Laure Resplandy, Eddy Robertson, Christian Rödenbeck, Thais M Rosan, Jörg Schwinger, Clemens Schwingshackl, Roland Séférian, Adrienne J. Sutton, Colm Sweeney, Toste Tanhua, Pieter P Tans, Hanqin Tian, Bronte Tilbrook, Francesco Tubiello, Guido van der Werf, Nicolas Vuichard, Chisato Wada Rik Wanninkhof, Andrew J. Watson, David Willis, Andrew J. Wiltshire, Wenping Yuan, Chao Yue, Xu Yue, Sönke Zaehle, Jiye Zeng. Global Carbon Budget 2021, Earth Syst. Sci. Data, 2021. https://doi.org/XXXXXXX # # NOTES: # - Given in carbon units, converted to carbon dioxide # In[20]: print( ">>>>>>> GCP <<<<<<<<") #read data df_gcp = pd.read_excel("../raw_data/GCP/National_Carbon_Emissions_2021v0.4.xlsx",1, header=11, index_col=0) df_gcp = df_gcp.T #clean df_gcp = df_gcp.reset_index().rename({"index":"name"}, axis=1) #delete "statistical difference" row df_gcp = df_gcp.drop(df_gcp.loc[df_gcp.name=="Statistical Difference"].index[0]) # convert from MtC to MtCO2 df_gcp.loc[:,1959:] = df_gcp.loc[:,1959:]*44/12 #add unit df_gcp.insert(1, "unit", "MtCO2") # CODE_GENERATOR_ISO3 df_gcp.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_gcp, show_missing=True,show_duplicates_nan=True) #global df_gcp_global = df_gcp[df_gcp.name=="World"] df_gcp_global.to_csv("../clean_data/global_subsets/GCP_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_gcp.to_csv("../clean_data/GCP.csv", index=None) df_gcp = pd.read_csv("../clean_data/GCP.csv") df_gcp_global # # GCP (consumption-based) # # INFO: # - Consumption-based CO2 emissions # - Updated from Peters, GP, Minx, JC, Weber, CL and Edenhofer, O 2011. Growth in emission transfers via international trade from 1990 to 2008. Proceedings of the National Academy of Sciences 108, 8903-8908. http://www.pnas.org/content/108/21/8903.abstract # - “Growth in emission transfers via international trade from 1990 to 2008” (Peters & Hertwich, 2008). # # NOTES: # - Given in carbon units, converted to carbon dioxide # In[44]: print( ">>>>>>> GCP (consumption) <<<<<<<<") #read data df_gcp_cons = pd.read_excel("../raw_data/GCP/National_Carbon_Emissions_2021v0.4.xlsx",2, header=8, index_col=0) df_gcp_cons = df_gcp_cons.T #clean df_gcp_cons = df_gcp_cons.reset_index().rename({"index":"name"}, axis=1) #delete "statistical difference" row df_gcp_cons = df_gcp_cons.drop(df_gcp_cons.loc[df_gcp_cons.name=="Statistical Difference"].index[0]) # convert from MtC to MtCO2 df_gcp_cons.loc[:,1959:] = df_gcp_cons.loc[:,1959:]*44/12 #add unit df_gcp_cons.insert(1, "unit", "MtCO2") # CODE_GENERATOR_ISO3 df_gcp_cons.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_gcp_cons, show_missing=True,show_duplicates_nan=True) #----------------------------------------------------------------------------------------------------------------------------- #global df_gcp_cons_global = df_gcp_cons[df_gcp_cons.name=="World"] df_gcp_cons_global.to_csv("../clean_data/global_subsets/GCP_consumption_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_gcp_cons.to_csv("../clean_data/GCP_consumption.csv", index=None) df_gcp_cons = pd.read_csv("../clean_data/GCP_consumption.csv") df_gcp_cons # # EPA # INFO: # - Environmental Protection Agency of the United States of America # - *Global Non-CO2 Greenhouse Gas Emission Projections & Mitigation Potential: 2015-2050* # - from https://www.epa.gov/global-mitigation-non-co2-greenhouse-gases/global-non-co2-greenhouse-gas-emission-projections (zip, proj-data-annex_Sept2019.xlsx file) # In[25]: print( ">>>>>>> EPA <<<<<<") # read file df_epa = pd.read_excel("../raw_data/EPA/proj-data-annex_Sept2019.xlsx", 0) #clean df_epa = df_epa.rename(columns={"country":"name"}) df_epa = df_epa.drop("unit",axis=1) df_epa.insert(1,"unit","MtCO2eq") df_epa.value.sum() # CODE_GENERATOR_ISO3 df_epa.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_epa, show_missing=True,show_duplicates_nan=True) # #global - should I add it to the total dataframe? df_epa_global = df_epa.groupby(["year", "unit","sector","source","subsource","gas"],dropna=False).sum().reset_index() df_epa_global.insert(0, "name","World") df_epa_global.insert(0, "code",np.nan) df_epa_global.to_csv("../clean_data/global_subsets/EPA_global.csv") print("\nGlobal non-CO2 emissions (2015): " + str(df_epa_global.groupby("year").sum().loc[2015,"value"]) + " MtCO2eq") #------------------------------------------------------------------------------------------------------------------------------ df_epa.to_csv("../clean_data/EPA.csv", index=None) df_epa = pd.read_csv("../clean_data/EPA.csv") df_epa # # FAO / FAOSTAT # INFO: # - Food and Agriculture Organization of the United Nations # - from FAOSTAT website > Emission totals > Bulk download > All data (zip file, using NOFLAG file here): https://www.fao.org/faostat/en/#data/GT # In[5]: print( ">>>>>>> FAOSTAT <<<<<<<<") df_fao = pd.read_csv("../raw_data/FAO/Emissions_Totals_E_All_Data_NOFLAG.csv", encoding='latin-1') #clean df_fao.columns = df_fao.columns.str.replace("Y","") df_fao = df_fao.rename(columns={"Area":"name"}) # some info # print("Items ", df.groupby("Item").count().index) # print("Elements ",df.groupby("Element").count().index) # print("Source ",df.groupby("Source").count().index) # print("Area ",df.groupby("Area").count().index) # CODE_GENERATOR_ISO3 df_fao.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_fao, show_missing=True,show_duplicates_nan=True) #global df_fao_global = df_fao[df_fao.name=="World"] df_fao_global.to_csv("../clean_data/global_subsets/FAO_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_fao.to_csv("../clean_data/FAO.csv", index=None) df_fao = pd.read_csv("../clean_data/FAO.csv", low_memory=False) df_fao_global # # CAIT # INFO: # - Climate Analysis IndicatorS Tool from the World Resources Institute (WRI) # - Climate Watch Historical GHG Emissions. 2021. Washington, DC: World Resources Institute. Available online at: https://www.climatewatchdata.org/ghg-emissions # - Downloaded from their website's data explorer > "Download bulk data"> GHG emissions (zip file, only using CAIT csv file) https://www.climatewatchdata.org/data-explorer/historical-emissions?historical-emissions-data-sources=cait&historical-emissions-end_year=2018&historical-emissions-gases=all-ghg&historical-emissions-regions=All%20Selected&historical-emissions-sectors=total-including-lucf&historical-emissions-start_year=1990&page=1 # # In[28]: print( ">>>>>>> CAIT <<<<<<<<") df_cait = pd.read_csv("../raw_data/CAIT/CW_HistoricalEmissions_CAIT.csv") #clean df_cait = df_cait.rename(columns={"Country":"code"}) # COUNTRY_DICT NOT NECESSARY HERE, INVERSE (creating names out of ISO codes) df_cait.insert(1, "name", np.nan) COUNTRY_NAME_GENERATOR_FROM_ISO3(df_cait, show_missing = True, show_duplicates_nan = True) #Global (included in dataset!) df_cait_global = df_cait[df_cait.code=="WORLD"] df_cait_global.to_csv("../clean_data/global_subsets/CAIT_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_cait.to_csv("../clean_data/CAIT.csv", index=None) df_cait = pd.read_csv("../clean_data/CAIT.csv") df_cait # # EDGAR # INFO: # - European Comission # - Crippa, Monica; Guizzardi, Diego; Muntean, Marilena; Schaaf, Edwin; Lo Vullo, Eleonora; Solazzo, Efisio; Monforti-Ferrario, Fabio; Olivier, Jos; Vignati, Elisabetta (2021): EDGAR v6.0 Greenhouse Gas Emissions. European Commission, Joint Research Centre (JRC) [Dataset] PID: http://data.europa.eu/89h/97a67d67-c62e-4826-b873-9d972c4f670b # # NOTES: # - given in native units (must convert to CO2 equivalents) # In[38]: print( ">>>>>>> EDGAR <<<<<<<<") """ IMPORTANT: EDGAR website separates fossil CO2 emissions and biomass (organic) emissions. In their main methodology paper they exclude biomass emissions (and other LULUCF emission), so we will do the same here too. To reiterate - biomass (organic) emissions are OMITTED here, but has its own dataset ("df_co2_org"). """ #separate co2-organic (biomass) dataset (excluded from main dataset) df_co2_org = pd.read_excel("../raw_data/" + "EDGAR/v60_CO2_org_short-cycle_C_1970_2018.xls", 0, header=9) df_co2_org.insert(7,"gas","CO2 (bio)") #read MAIN data df_co2_fossil = pd.read_excel("../raw_data/" + "EDGAR/v60_CO2_excl_short-cycle_org_C_1970_2018.xls", 0, header=9) df_co2_fossil.insert(7,"gas","CO2 (fossil)") df_ch4 = pd.read_excel("../raw_data/" + "EDGAR/v60_CH4_1970_2018.xls", 0, header=9) df_ch4.insert(7,"gas","CH4") df_n2o = pd.read_excel("../raw_data/" + "EDGAR/v60_N2O_1970_2018.xls", 0, header=9) df_n2o.insert(7,"gas","N2O") #convert into one dataframe df_edgar = pd.concat([df_ch4, df_co2_fossil, df_n2o]).reset_index().drop("index", axis=1) #clean df_edgar = df_edgar.rename(columns={"Name":"name"}) df_edgar = df_edgar.rename(columns={"Country_code_A3":"code"}) df_edgar.columns = df_edgar.columns.str.replace("Y_","") #df_edgar.ipcc_code_2006_for_standard_report = df_edgar.ipcc_code_2006_for_standard_report.str.replace(".","") # IMPORTANT: CONVERT TO CO2equivalents! GWP_100_ipcc_ar6 = { "CO2 (fossil)" : 1, "CO2 (bio)" : 1, "CH4" : 27.9, "N2O" : 273, } for i in range(len(df_edgar)): df_edgar.loc[i,"1970":] = df_edgar.loc[i,"1970":] * GWP_100_ipcc_ar6[df_edgar.loc[i,"gas"]] # divide by 1000 (converting gigagrams to Mt) df_edgar.loc[:,"1970":] = df_edgar.loc[:,"1970":] / 1000 #add unit column df_edgar.insert(8,"unit","MtCO2") #separate co2-organic (biomass) dataset df_co2_org = pd.read_excel("../raw_data/" + "EDGAR/v60_CO2_org_short-cycle_C_1970_2018.xls", 0, header=9) df_co2_org.insert(7,"gas","CO2 (bio)") #no need to make codes nor names! # woooo! (except if I want to make codes for country groups such as "IPCC_annex" or "C_group_IM24_sh") #global! (not inlcluded in main) df_edgar_global = df_edgar.groupby(["IPCC_annex","ipcc_code_2006_for_standard_report", "ipcc_code_2006_for_standard_report_name","fossil_bio", "gas", "unit"]).sum().reset_index() df_edgar_global.to_csv("../clean_data/global_subsets/EDGAR_global.csv") print("NO DUPLICATES HERE!") #------------------------------------------------------------------------------------------------------------------------------ df_edgar.to_csv("../clean_data/EDGAR.csv", index=None) df_edgar = pd.read_csv("../clean_data/EDGAR.csv") df_edgar_global # # PRIMAP-hist (Version 2.31) # INFO: # - Potsdam-Institut für Klimafolgenforschung # - Gütschow, J.; Jeffery, L.; Gieseke, R.; Gebel, R.; Stevens, D.; Krapp, M.; Rocha, M. (2016): The PRIMAP-hist national historical emissions time series, Earth Syst. Sci. Data, 8, 571-603, https://doi.org/10.5194/essd-8-571-2016 # - Dataset downloaded from https://zenodo.org/record/5494497 (Guetschow-et-al-2021-PRIMAP-hist_v2.3.1_20-Sep_2021.csv) # In[51]: print( ">>>>>>> PRIMAP-hist <<<<<<<<") """ Warning! CH4, N2O, NF3 and SF6 have not been converted to CO2eqs yet! """ df_primap = pd.read_csv("../raw_data/PRIMAP-hist/Guetschow-et-al-2021-PRIMAP-hist_v2.3.1_20-Sep_2021.csv") #clean df_primap = df_primap.rename(columns={"area (ISO3)":"code"}) # COUNTRY_DICT NOT NECESSARY HERE, INVERSE (creating names out of ISO codes) df_primap.insert(3, "name", np.nan) COUNTRY_NAME_GENERATOR_FROM_ISO3(df_primap, show_missing = True, show_duplicates_nan = True) #global df_primap_global = df_primap[df_primap.code=="EARTH"] df_primap_global.to_csv("../clean_data/global_subsets/PRIMAP-hist_global.csv")#[df_primap_global["category (IPCC2006_PRIMAP)"]=="M.0.EL"].groupby(["entity","unit","scenario (PRIMAP-hist)"]).sum()["2019"]/1000 print("NO DUPLICATES HERE!") #------------------------------------------------------------------------------------------------------------------------------ df_primap.to_csv("../clean_data/PRIMAP-hist.csv", index=None) df_primap = pd.read_csv("../clean_data/PRIMAP-hist.csv") df_primap_global # # Minx et al 2021 # INFO: # - A comprehensive and synthetic dataset for global, regional and national greenhouse gas emissions by sector 1970-2018 with an extension to 2019 # - VERSION 5 from Zenodo https://zenodo.org/record/5844489 # - Using *essd_ghg_data.xlsx* file. Gives two convenient columns for ar4 and ar5 GWPs # # NOTES: # - 2020 non-CO2 emissions seem to be incomplete # In[57]: print( ">>>>>>> Minx et al 2021 <<<<<<<<") """ IPCC categorization cannot be integrated into the main dataset (yet). This is because there are various subsectors (and some sectors such as INdustry and Energy) that overlap. """ #read file (excel with multiple interesting sheets) # minx_info = pd.read_excel("raw_data/Minx/essd_ghg_data.xlsx",0) #only info # minx_metadata = pd.read_excel("raw_data/Minx/essd_ghg_data.xlsx",1) #not very useful minx_data = pd.read_excel("../raw_data/Minx/essd_ghg_data.xlsx",2) #MAIN DATA # minx_sector_classification = pd.read_excel("raw_data/Minx/essd_ghg_data.xlsx",3) #includes description and IPCC categories! # minx_region_classification = pd.read_excel("raw_data/Minx/essd_ghg_data.xlsx",4) #not useful # minx_100_yr_gwps = pd.read_excel("raw_data/Minx/essd_ghg_data.xlsx",5) #both ar5 and ar6 GWPS for all included gases # minx_CH4_gwps = pd.read_excel("raw_data/Minx/essd_ghg_data.xlsx",6) # CH4 classification and chapters #choose dataset df_minx = minx_data #clean a little df_minx = df_minx.rename(columns={"country":"name", "ISO":"code", "value":"value_native"}) # add columns for both ar5 and ar6 gwps df_minx = df_minx.assign(value_ar5=df_minx.value_native*df_minx.gwp100_ar5) df_minx = df_minx.assign(value_ar6=df_minx.value_native*df_minx.gwp100_ar6) #codes and names are amazing print("NO DUPLICATES HERE!") #global df_minx_global = df_minx.groupby(["year","region_ar6_6","region_ar6_10","region_ar6_22","region_ar6_dev","sector_title","subsector_title","gas","gwp100_ar5", "gwp100_ar6"]).sum() df_minx_global = df_minx_global.reset_index() df_minx_global.to_csv("../clean_data/global_subsets/Minx2021_global.csv") #------------------------------------------------------------------------------------------------------------------------------ df_minx.to_csv("../clean_data/Minx2021.csv", index=None) df_minx = pd.read_csv("../clean_data/Minx2021.csv") df_minx # In[58]: df_minx_global # # UNFCCC Annex I Submissions # - from Open Climate Data: https://github.com/openclimatedata # - all values in current state are converted to CO2equivalents using AR6 GWPs, to get native units DIVIDE BY GWPs columns # In[56]: print( ">>>>>>> UNFCCC Annex I <<<<<<<<") df_unfccc_ai = pd.read_csv("../raw_data/UNFCCC/detailed-data-by-country-annex-one.csv") #clean df_unfccc_ai = df_unfccc_ai.rename(columns={"Party":"name"}) df_unfccc_ai = df_unfccc_ai.replace({"CO₂":"CO2", "N₂O": "N2O", "CH₄": "CH4", "SF₆": "SF6", "NF₃":"NF3"}) #add GWPs GWP_100_ipcc_ar6 = { "Aggregate GHGs" : 1, "CO2" : 1, "CH4" : 27.9, "N2O" : 273, "SF6" : 25200, "NF3" : 17400, "HFCs": 1, #unit is already in CO2eq "PFCs": 1, #unit is already in CO2eq "Aggregate F-gases": 1,#unit is already in CO2eq "Unspecified mix of HFCs and PFCs": 1 #unit is already in CO2eq } GWP_100_ipcc_ar6_df = pd.DataFrame(GWP_100_ipcc_ar6.keys(), GWP_100_ipcc_ar6.values()).reset_index().set_index(0) GWP_100_ipcc_ar6_df.index.name = "gas" GWP_100_ipcc_ar4 = { "Aggregate GHGs" : 1, "CO2" : 1, "CH4" : 25, "N2O" : 298, "SF6" : 22800, "NF3" : 17200, "HFCs": 1, #unit is already in CO2eq "PFCs": 1, #unit is already in CO2eq "Aggregate F-gases": 1,#unit is already in CO2eq "Unspecified mix of HFCs and PFCs": 1 #unit is already in CO2eq } GWP_100_ipcc_ar4_df = pd.DataFrame(GWP_100_ipcc_ar4.keys(), GWP_100_ipcc_ar4.values()).reset_index().set_index(0) GWP_100_ipcc_ar4_df.index.name = "gas" df_unfccc_ai.insert(5,"GWP_AR4_conditional",df_unfccc_ai.merge(GWP_100_ipcc_ar4_df, left_on="Gas", right_on="gas", how="left")["index"]) df_unfccc_ai.insert(6,"GWP_AR6_conditional",df_unfccc_ai.merge(GWP_100_ipcc_ar4_df, left_on="Gas", right_on="gas", how="left")["index"]) # optional stuff # df_unfccc_ai.insert(1, "IPCC_cat", df_unfccc_ai["Category"].str[0]) # CODE_GENERATOR_ISO3 df_unfccc_ai.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_unfccc_ai, show_missing=True,show_duplicates_nan=True) #fix units and convert df_unfccc_ai.loc[df_unfccc_ai.Unit=="kt","Base year":] = df_unfccc_ai.loc[df_unfccc_ai.Unit=="kt","Base year":].div(1000).mul(df_unfccc_ai.loc[df_unfccc_ai.Unit=="kt","Unit":]["GWP_AR6_conditional"], axis=0) df_unfccc_ai.loc[df_unfccc_ai.Unit=="kt CO₂ equivalent","Base year":] = df_unfccc_ai.loc[df_unfccc_ai.Unit=="kt CO₂ equivalent","Base year":].div(1000).mul(df_unfccc_ai.loc[df_unfccc_ai.Unit=="kt CO₂ equivalent","Unit":]["GWP_AR6_conditional"], axis=0) df_unfccc_ai.loc[df_unfccc_ai.Unit=="t CO₂ equivalent","Base year":] = df_unfccc_ai.loc[df_unfccc_ai.Unit=="t CO₂ equivalent","Base year":].div(1000000).mul(df_unfccc_ai.loc[df_unfccc_ai.Unit=="t CO₂ equivalent","Unit":]["GWP_AR6_conditional"], axis=0) df_unfccc_ai.loc[df_unfccc_ai.Unit=="t","Base year":] = df_unfccc_ai.loc[df_unfccc_ai.Unit=="t","Base year":].div(1000000).mul(df_unfccc_ai.loc[df_unfccc_ai.Unit=="t","Unit":]["GWP_AR6_conditional"], axis=0) df_unfccc_ai.loc[:,"Unit"] = "MtCO2eq" #------------------------------------------------------------------------------------------------------------------------------ df_unfccc_ai.to_csv("../clean_data/UNFCCC_AI.csv", index=None) df_unfccc_ai = pd.read_csv("../clean_data/UNFCCC_AI.csv") df_unfccc_ai # # UNFCCC Non-Annex I # - from Open Climate Data: https://github.com/openclimatedata # # In[ ]: # In[23]: print( ">>>>>>> UNFCCC Non-Annex I <<<<<<<<") df_unfccc_nai = pd.read_csv("../raw_data/UNFCCC//detailed-data-by-country-non-annex-one.csv") # clean df_unfccc_nai = df_unfccc_nai.rename(columns={"Party":"name"}) df_unfccc_nai = df_unfccc_nai.replace({"CO₂":"CO2", "N₂O": "N2O", "CH₄": "CH4", "SF₆": "SF6", "NF₃":"NF3"}) # IMPOSSIBLE to convert to common units? # GWP_100_ipcc_ar6 = { # "Aggregate GHGs" : 1, # "CO2" : 1, # "CH4" : 27.9, # "N2O" : 273, # "SF6" : 1, #unit is already in CO2eq # "NF3" : 1, #unit is already in CO2eq # "HFCs": 1, #unit is already in CO2eq # "PFCs": 1, #unit is already in CO2eq # "Aggregate F-gases": 1,#unit is already in CO2eq # "Unspecified mix of HFCs and PFCs": 1 #unit is already in CO2eq # } # GWP_100_ipcc_ar6_df = pd.DataFrame(GWP_100_ipcc_ar6.keys(), GWP_100_ipcc_ar6.values()).reset_index().set_index(0) # GWP_100_ipcc_ar6_df.index.name = "gas" # df_unfccc_nai.insert(5,"GWP_AR6_conditional",df_unfccc_nai.merge(GWP_100_ipcc_ar6_df, left_on="Gas", right_on="gas", how="left")["index"]) # df_unfccc_nai.loc[:, "1990":] = df_unfccc_nai.loc[:, "1990":].mul(df_unfccc_nai["GWP_AR6_conditional"], axis=0) # df_unfccc_nai.loc[:,"Unit"]= "MtCO2eq" #Convert from Gg to Mt df_unfccc_nai.loc[:, "1990":] = df_unfccc_nai.loc[:, "1990":].div(1000) df_unfccc_nai.loc[:,"Unit"]= df_unfccc_nai.loc[:,"Unit"].replace({"Gg CO₂ equivalent":"MtCO2eq", "Gg": "Mt (substance)"}) # CODE_GENERATOR_ISO3 df_unfccc_nai.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_unfccc_nai, show_missing=True,show_duplicates_nan=True) #------------------------------------------------------------------------------------------------------------------------------ df_unfccc_nai.to_csv("../clean_data/UNFCCC_NAI.csv", index=None) df_unfccc_nai = pd.read_csv("../clean_data/UNFCCC_NAI.csv") df_unfccc_nai # In[ ]: # In[ ]: # In[ ]: # # Other Inventories # - Carbon Majors # - EDGAR FOOD? # - LUCF from GCP # - LUCF from MINX # - CEDS (which is not mainly for GHG but for pollutants. Still quite nice) # - carbon monitor # # Carbon Monitor # - Downloaded from https://carbonmonitor.org/ # - Liu, Z., Ciais, P., Deng, Z. et al. Near-real-time monitoring of global CO2 emissions reveals the effects of the COVID-19 pandemic. Nat Commun 11, 5172 (2020). https://doi.org/10.1038/s41467-020-18922-7 # In[2]: print( ">>>>>>> Carbon Monitor <<<<<<") #download the file df_carbon_mon = pd.read_csv("https://datas.carbonmonitor.org/API/downloadFullDataset.php?source=carbon_global") df_carbon_mon.date = pd.to_datetime(df_carbon_mon.date, format= "%d/%m/%Y") #clean df_carbon_mon = df_carbon_mon.rename(columns={"country":"name"}) df_carbon_mon.insert(3,"unit","MtCO2") # CODE_GENERATOR_ISO3 df_carbon_mon.insert(0, "code", np.nan) CODE_GENERATOR_ISO3(df_carbon_mon, show_missing=True, show_duplicates_nan=True) # global df_carbon_mon_global = df_carbon_mon[df_carbon_mon.name=="WORLD"] df_carbon_mon_global.to_csv("../clean_data/global_subsets/Carbon-Monitor_global.csv") # #------------------------------------------------------------------------------------------------------------------------------ df_carbon_mon.to_csv("../clean_data/Carbon-Monitor.csv") df_carbon_mon = pd.read_csv("../clean_data/Carbon-Monitor.csv", index_col=0) df_carbon_mon # # Carbon Majors # # Climate Accountability Institute # # Principal Investigator: Richard Heede # # Some links: # - https://climateaccountability.org/carbonmajors_dataset2020.html # - https://climateaccountability.org/pdf/MRR%209.1%20Apr14R.pdf # In[5]: df_carbon_majors = pd.read_csv("../clean_data/Carbon-Majors-2018.csv") df_carbon_majors = df_carbon_majors.set_index("Entity") df_carbon_majors