#!/usr/bin/env python # coding: utf-8 # ### Summarize tidy tables # This script summarizes the water use and water suppy tidy tables generated by the *CreateUsageTable* and *CreateSupplyTable* scripts, respectively. Each table is then merged into a single dataframe to create a table listing water use and supply for each year/state combination. # # ----- # #### Workflow # * Import and summarize use table on state, listing usage amounts by use class and source class (surface/groundwater) # * Import and summarize supply table, first on county so that amounts can be converted from mm to MGal/year. # * Summarize the county supply table to the state level, listing the total MGal/year of supply in each state # In[1]: #Import libraries import sys, os import pandas as pd import numpy as np # In[2]: #Get file names; these files are created by the CreateUsageTable.py and CreateSupplyTable.py respectively dataDir = '../../Data' tidyuseFN = dataDir + os.sep + "UsageDataTidy.csv" tidysupplyFN = dataDir + os.sep + "SupplyTableTidy.csv" outCountyFN = dataDir + os.sep + "WaterByCounty.csv" outStateFN = dataDir + os.sep + "WaterByState.csv" outNationFN = dataDir + os.sep + "WaterBalanceData.csv" # #### Summarize USE table by county # Computes water usage for each county broken into each sector and source category. # In[3]: #Read in the usage table from the csv file dfUse = pd.read_csv(tidyuseFN,dtype={'FIPS':np.str}) # In[4]: #Remove rows with irrigation and thermoelectric sub-classes #dropValues = ['Irrigation_Crop', 'Irrigation_Golf','ThermoElec_OnceThru', 'ThermoElec_Recirc'] dropValues = ['Irrigation','ThermoElec'] dfUse = dfUse[~dfUse['UseClass'].isin(dropValues)] # In[5]: #Convert amounts from MGal/day to MGal/year dfUse['Amount'] = dfUse['Amount'] * 365 # In[6]: #Add STATEFIPS column to dfUse (as left most 2 characters of FIPS values) dfUse['STATEFIPS'] = dfUse['FIPS'].str[:2] # In[7]: #Pivot on YEAR and FIPS listing usage in sector/source categories dfUseFIPS = dfUse.pivot_table(index=['YEAR','STATE','FIPS'], values='Amount', aggfunc='sum', columns=['UseClass','SrcClass']) # In[8]: #Flatten hierarchical column names dfUseFIPS.columns = ['_'.join(col).strip() for col in dfUseFIPS.columns.values] # In[9]: #Remove indices so values are available as columns dfUseFIPS.reset_index(inplace=True) # In[10]: dfUseFIPS.head(2) # #### Import and summarize supply table by county # In[11]: #Read in the supply table from the csv file dfSupply = pd.read_csv(tidysupplyFN,dtype={'FIPS':np.str,'STATEFIPS':np.str}) # In[12]: #Compute supply as precipitation - evapotranspiration #(See https://www.fs.fed.us/rm/value/docs/spatial_distribution_water_supply.pdf) # * Could also use total_runoff # * Values are in mm/year and need to be adjusted to MGal/year by mulitplying by weighted area dfSupply['Supply'] = dfSupply['pr'] - dfSupply['et'] # In[13]: #Summarize supply on YEAR and FIPS '''We take the mean mm/year across points in a county and then mulitply by county area to get volume (mm * m3). These values then need to by converted to MGal to give MGal/year ''' #Compute mean runoff and supply on year and county dfSupplyFIPS = dfSupply.groupby(('YEAR','STATEFIPS','FIPS','Area'))['total_runoff','Supply'].mean() #Reset the index so Year, StateFIPS, FIPS, and AREA become columns again dfSupplyFIPS.reset_index(inplace=True) #Convert mm/Year * county area (m2) into MGal/year - to match use values ''' m = [mm] / 1000; m * [m2] = m3; [m3] / 3785.41178 = 1 MGal''' for param in ('total_runoff','Supply'): dfSupplyFIPS[param] = (dfSupplyFIPS[param] / 1000.0) * dfSupplyFIPS.Area / 3785.41178 # In[14]: dfSupplyFIPS.head(2) # ### Join Use and Supply Tables on Year and FIPS # In[15]: dfSupplyFIPS.columns.values # In[16]: #Merge the two tables on YEAR and FIPS columns dfAll = pd.merge(dfUseFIPS,dfSupplyFIPS, how='outer',on=['YEAR','FIPS'],left_index=True,right_index=True) # In[17]: dfAll.head(2) # In[18]: #Export to csv dfAll.to_csv(outCountyFN, index=False, encoding='utf8') # ### Summarize for entire US # In[19]: #Group by YEAR dfUS = dfAll.groupby('YEAR').sum() dfUS.head() # In[20]: dfUS.reset_index(inplace=True) # In[21]: dfUSm = pd.melt(dfUS,id_vars='YEAR',var_name='Group',value_name='MGal') # In[22]: dfUSm.to_csv(outNationFN,index=False)