This script summarizes the water use and water suppy tidy tables generated by the CreateUsageTable and CreateSupplyTable scripts, respectively. Each table is then merged into a single dataframe to create a table listing water use and supply for each year/state combination.
#Import libraries
import sys, os
import pandas as pd
import numpy as np
#Get file names; these files are created by the CreateUsageTable.py and CreateSupplyTable.py respectively
dataDir = '../../Data'
tidyuseFN = dataDir + os.sep + "UsageDataTidy.csv"
tidysupplyFN = dataDir + os.sep + "SupplyTableTidy.csv"
outCountyFN = dataDir + os.sep + "WaterByCounty.csv"
outStateFN = dataDir + os.sep + "WaterByState.csv"
outNationFN = dataDir + os.sep + "WaterBalanceData.csv"
Computes water usage for each county broken into each sector and source category.
#Read in the usage table from the csv file
dfUse = pd.read_csv(tidyuseFN,dtype={'FIPS':np.str})
#Remove rows with irrigation and thermoelectric sub-classes
#dropValues = ['Irrigation_Crop', 'Irrigation_Golf','ThermoElec_OnceThru', 'ThermoElec_Recirc']
dropValues = ['Irrigation','ThermoElec']
dfUse = dfUse[~dfUse['UseClass'].isin(dropValues)]
#Convert amounts from MGal/day to MGal/year
dfUse['Amount'] = dfUse['Amount'] * 365
#Add STATEFIPS column to dfUse (as left most 2 characters of FIPS values)
dfUse['STATEFIPS'] = dfUse['FIPS'].str[:2]
#Pivot on YEAR and FIPS listing usage in sector/source categories
dfUseFIPS = dfUse.pivot_table(index=['YEAR','STATE','FIPS'],
values='Amount',
aggfunc='sum',
columns=['UseClass','SrcClass'])
#Flatten hierarchical column names
dfUseFIPS.columns = ['_'.join(col).strip() for col in dfUseFIPS.columns.values]
#Remove indices so values are available as columns
dfUseFIPS.reset_index(inplace=True)
dfUseFIPS.head(2)
YEAR | STATE | FIPS | Aquaculture_Groundwater | Aquaculture_Surface | Domestic_Groundwater | Domestic_Surface | Industrial_Groundwater | Industrial_Surface | Irrigation_Crop_Groundwater | ... | Mining_Groundwater | Mining_Surface | PublicSupply_Groundwater | PublicSupply_Surface | ThermoElec_OnceThru_Groundwater | ThermoElec_OnceThru_Surface | ThermoElec_Recirc_Groundwater | ThermoElec_Recirc_Surface | Total_Groundwater | Total_Surface | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2000 | AK | 02013 | NaN | NaN | 3.65 | 0.0 | 0.0 | 821.25 | NaN | ... | 0.0 | 0.0 | 7.30 | 357.7 | NaN | 0.0 | 0.0 | 0.0 | 10.95 | 1178.95 |
1 | 2000 | AK | 02016 | NaN | NaN | 0.00 | 0.0 | 0.0 | 876.00 | NaN | ... | 0.0 | 0.0 | 127.75 | 781.1 | NaN | 0.0 | 0.0 | 0.0 | 127.75 | 1657.10 |
2 rows × 25 columns
#Read in the supply table from the csv file
dfSupply = pd.read_csv(tidysupplyFN,dtype={'FIPS':np.str,'STATEFIPS':np.str})
#Compute supply as precipitation - evapotranspiration
#(See https://www.fs.fed.us/rm/value/docs/spatial_distribution_water_supply.pdf)
# * Could also use total_runoff
# * Values are in mm/year and need to be adjusted to MGal/year by mulitplying by weighted area
dfSupply['Supply'] = dfSupply['pr'] - dfSupply['et']
#Summarize supply on YEAR and FIPS
'''We take the mean mm/year across points in a county and then
mulitply by county area to get volume (mm * m3). These values
then need to by converted to MGal to give MGal/year
'''
#Compute mean runoff and supply on year and county
dfSupplyFIPS = dfSupply.groupby(('YEAR','STATEFIPS','FIPS','Area'))['total_runoff','Supply'].mean()
#Reset the index so Year, StateFIPS, FIPS, and AREA become columns again
dfSupplyFIPS.reset_index(inplace=True)
#Convert mm/Year * county area (m2) into MGal/year - to match use values
''' m = [mm] / 1000;
m * [m2] = m3;
[m3] / 3785.41178 = 1 MGal'''
for param in ('total_runoff','Supply'):
dfSupplyFIPS[param] = (dfSupplyFIPS[param] / 1000.0) * dfSupplyFIPS.Area / 3785.41178
dfSupplyFIPS.head(2)
YEAR | STATEFIPS | FIPS | Area | total_runoff | Supply | |
---|---|---|---|---|---|---|
0 | 2000 | 01 | 01001 | 1.565359e+09 | 164183.119550 | 136727.091260 |
1 | 2000 | 01 | 01003 | 5.250715e+09 | 747212.294697 | 697763.569535 |
dfSupplyFIPS.columns.values
array(['YEAR', 'STATEFIPS', 'FIPS', 'Area', 'total_runoff', 'Supply'], dtype=object)
#Merge the two tables on YEAR and FIPS columns
dfAll = pd.merge(dfUseFIPS,dfSupplyFIPS, how='outer',on=['YEAR','FIPS'],left_index=True,right_index=True)
dfAll.head(2)
YEAR | STATE | FIPS | Aquaculture_Groundwater | Aquaculture_Surface | Domestic_Groundwater | Domestic_Surface | Industrial_Groundwater | Industrial_Surface | Irrigation_Crop_Groundwater | ... | ThermoElec_OnceThru_Groundwater | ThermoElec_OnceThru_Surface | ThermoElec_Recirc_Groundwater | ThermoElec_Recirc_Surface | Total_Groundwater | Total_Surface | STATEFIPS | Area | total_runoff | Supply | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2000 | AK | 02013 | NaN | NaN | 3.65 | 0.0 | 0.0 | 821.25 | NaN | ... | NaN | 0.0 | 0.0 | 0.0 | 10.95 | 1178.95 | 01 | 1.565359e+09 | 164183.119550 | 136727.091260 |
1 | 2000 | AK | 02016 | NaN | NaN | 0.00 | 0.0 | 0.0 | 876.00 | NaN | ... | NaN | 0.0 | 0.0 | 0.0 | 127.75 | 1657.10 | 01 | 5.250715e+09 | 747212.294697 | 697763.569535 |
2 rows × 29 columns
#Export to csv
dfAll.to_csv(outCountyFN, index=False, encoding='utf8')
#Group by YEAR
dfUS = dfAll.groupby('YEAR').sum()
dfUS.head()
Aquaculture_Groundwater | Aquaculture_Surface | Domestic_Groundwater | Domestic_Surface | Industrial_Groundwater | Industrial_Surface | Irrigation_Crop_Groundwater | Irrigation_Crop_Surface | Irrigation_Golf_Groundwater | Irrigation_Golf_Surface | ... | PublicSupply_Surface | ThermoElec_OnceThru_Groundwater | ThermoElec_OnceThru_Surface | ThermoElec_Recirc_Groundwater | ThermoElec_Recirc_Surface | Total_Groundwater | Total_Surface | Area | total_runoff | Supply | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
YEAR | |||||||||||||||||||||
2000 | 385301.30 | 964625.65 | 1094419.65 | 12136.25 | 1307024.85 | 5900746.95 | NaN | NaN | NaN | NaN | ... | 9955243.60 | NaN | 64593608.35 | 149412.75 | 6525090.40 | 26287117.50 | 97545549.2 | 8.617206e+12 | 4.970406e+08 | 4.837853e+08 |
2005 | 695671.75 | 2509250.90 | 1365508.80 | 32017.80 | 1133857.90 | 5491231.55 | 4485871.90 | 10189058.95 | 146657.0 | 168275.95 | ... | 10815975.65 | 561574.40 | 66812209.75 | 154570.20 | 5795367.80 | 30165092.85 | 119654562.8 | 8.811690e+12 | 5.600854e+08 | 5.907308e+08 |
2010 | 662522.45 | 2777405.45 | 1292045.25 | 23330.80 | 1076239.00 | 4744346.65 | 12366590.55 | 18290529.60 | 207261.6 | 195475.75 | ... | 9610727.40 | 76909.15 | 54995433.30 | 186212.05 | 3461802.35 | 28941576.35 | 100469498.5 | 6.804011e+12 | 5.688181e+08 | 6.134356e+08 |
3 rows × 25 columns
dfUS.reset_index(inplace=True)
dfUSm = pd.melt(dfUS,id_vars='YEAR',var_name='Group',value_name='MGal')
dfUSm.to_csv(outNationFN,index=False)