Reads and plots Charlotte water quality data pulled using the CharlotteScraper.ipynb
notebook.
#Imports
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
## Bokeh map components
from bokeh.models import ColumnDataSource,WMTSTileSource
from bokeh.plotting import figure, show, output_file
from bokeh.tile_providers import *
from bokeh.io import output_notebook
output_notebook()
#Function to convert WGS84 points to Web Mercator
def wgs84_to_web_mercator(df, lon="lon", lat="lat"):
"""Converts decimal longitude/latitude to Web Mercator format"""
k = 6378137
df["x"] = df[lon] * (k * np.pi/180.0)
df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k
return df
#Get files
locationFiles = glob.glob('.\\data\\Charlotte\\**\\*Organic Chemicals.csv')
resultFiles = glob.glob('.\\data\\Charlotte\\**\\Organics Results *.csv')
resultFiles
#Merge result files
dfList = []
for file in resultFiles:
df = pd.read_csv(file,index_col="OBJECTID")#,parse_dates=['CollectionDate'])
#print(str(df.columns.values.tolist()))
dfList.append(df)
dfResults = pd.concat(dfList,axis='rows')
#Drop the GlobalID column
dfResults.drop("GlobalID",axis=1,inplace=True)
#Add a year column
dfResults['CollectionDate'] = pd.to_datetime(dfResults['CollectionDate'],format='%Y-%m-%d')
dfResults.insert(0,'Year',dfResults['CollectionDate'].dt.year)
#Merge locations files
dfList = []
for file in locationFiles[1:]:
df = pd.read_csv(file,index_col="OBJECTID")
df.insert(0,'Year',file.split("\\")[1][:4])
#print(str(df.columns.values.tolist()))
dfList.append(df)
dfLocations = pd.concat(dfList)
#Check that RawLocation codes are unique
datesDF = dfLocations.groupby(['RawLocationCode','x','y','Year'])['NPA'].count().unstack('Year')
datesDF.max() #None should be > 1
#Convert results to numbers, setting trace values to 0
dfResults['Value'] = dfResults['Result'].apply(lambda x: x.split()[0] if (x[0] != '<') else 0)
dfResults['Value'] = dfResults['Value'].astype(np.float)
#Group by analyte and year
dfX = dfResults.groupby(['Year','AnalyteName'])['Value']
dfYear = dfX.mean().unstack('Year')
dfYear.sample(5)
#Examine the row identifier columns
dfLocations[['Year','RawLocationCode','x','y']].head()
#Add coordinates to the results by joining the location data
dfSite = pd.merge(dfResults,dfLocations[['RawLocationCode','x','y']],how='left',on=['RawLocationCode'])
#Convert coordinates to web mercator
dfSite.rename({'x':'lon','y':'lat'},axis='columns',inplace=True)
dfSite = wgs84_to_web_mercator(dfSite)
dfSite.head()
#Create a bokeh CDS from the dataframe
source = ColumnDataSource(dfSite)
#Simple data source
p = figure(title="Map")
p.circle(x='x',y='y',source=source)
show(p)
xMin = dfSite.x.min();xMax = dfSite.x.max()
yMin = dfSite.y.min();yMax = dfSite.y.max()
p = figure(x_range=(xMin, xMax), y_range=(yMin, yMax))#,
#x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(get_provider(CARTODBPOSITRON))
p.circle(x='x',y='y',source=source)
show(p)