In [1]:
# ms-python.python added
import os
try:
	os.chdir(os.path.join(os.getcwd(), 'MyGeoScripts'))
	print(os.getcwd())
except:
	pass
In [2]:
import geopandas as gpd
import pandas as pd

shapefile = '../Data/County/County.shp'

# read shape file using geopandas
county = gpd.read_file(shapefile)
county.head()
Out[2]:
OBJECTID AREA PERIMETER COUNTY3_ COUNTY3_ID COUNTY Shape_Leng Shape_Area geometry
0 1 5.677 15.047 2.0 1.0 Turkana 15.046838 5.676985 POLYGON ((35.79592513993344 5.344485759410418,...
1 2 6.177 11.974 3.0 2.0 Marsabit 11.974165 6.176831 POLYGON ((36.05060958836197 4.456217766237103,...
2 3 2.117 7.355 4.0 3.0 Mandera 7.355154 2.117196 POLYGON ((41.62132644625865 3.97673463779347, ...
3 4 4.610 9.838 5.0 4.0 Wajir 9.838408 4.609589 POLYGON ((39.31811523443071 3.47196984263428, ...
4 5 0.740 5.030 6.0 5.0 West Pokot 5.030271 0.740481 POLYGON ((35.12744522072052 2.622711658216588,...
In [3]:
county = gpd.read_file(shapefile)[['COUNTY','geometry']]
county.head()
Out[3]:
COUNTY geometry
0 Turkana POLYGON ((35.79592513993344 5.344485759410418,...
1 Marsabit POLYGON ((36.05060958836197 4.456217766237103,...
2 Mandera POLYGON ((41.62132644625865 3.97673463779347, ...
3 Wajir POLYGON ((39.31811523443071 3.47196984263428, ...
4 West Pokot POLYGON ((35.12744522072052 2.622711658216588,...
In [4]:
datafile = '../Data/County/kewi_water_test_and_results1.csv'
df = pd.read_csv(datafile,sep=',')
df.head()
Out[4]:
SAMPLE_NO DATE TYPE_OF_H2O COUNTY TYPE_OF_ANALYSIS PH ALKALINITY_Mg/L CONDUCTIVITY?S/cm Total_Dissolved_Solids COLOUR OBJECTID
0 19T NaN Tank Machakos Jar Test/Bacteriological Analysis/Full Chemica... 7.90 1.0 136.40 220.0 25 396
1 1484 18/6/2015 Effluent Kajiado Effluent 6.60 1.4 1001.92 1616.0 20 379
2 176 13/6/16 Borehole Kiambu Full Chemical Analysis & Bacteriological Analysis 5.40 2.0 35.34 57.0 0 472
3 89 4/6/2016 Tap Murang'a Full Chemical Analysis/Bacteriological Analysis 5.70 2.0 124.00 200.0 2.5 434
4 737 4/24/2012 Borehole Nairobi Full Chemical Analysis & Bacteriological Analysis 8.21 2.0 15.00 9.2 2.5 43
In [5]:
#take county and water alkalinity levels
alkaline_df1 = df[['COUNTY','ALKALINITY_Mg/L']]

#renamed second column
alkaline_df1.rename(columns={"ALKALINITY_Mg/L" : "ALKALINITY"}, inplace=True)
alkaline_df1.head()
alkaline_df1.describe()
/home/marvin/.virtualenvs/StartUp/lib/python3.7/site-packages/pandas/core/frame.py:4025: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
Out[5]:
ALKALINITY
count 392.000000
mean 674.590306
std 2736.531968
min 1.000000
25% 38.250000
50% 98.000000
75% 297.000000
max 38600.000000
In [6]:
# remove rows with missing values in alkalinity
alkaline_df2= alkaline_df1.dropna()
alkaline_df2.head()
Out[6]:
COUNTY ALKALINITY
0 Machakos 1.0
1 Kajiado 1.4
2 Kiambu 2.0
3 Murang'a 2.0
4 Nairobi 2.0
In [7]:
alkaline_df2.dtypes
Out[7]:
COUNTY         object
ALKALINITY    float64
dtype: object
In [8]:
# remove duplicated county names by getting the mean alkalinity of the respective duplicates and having it as one county
alkaline_df3 = alkaline_df2.groupby('COUNTY').mean().reset_index()
alkaline_df3.head()
alkaline_df1.describe()
Out[8]:
ALKALINITY
count 392.000000
mean 674.590306
std 2736.531968
min 1.000000
25% 38.250000
50% 98.000000
75% 297.000000
max 38600.000000
In [9]:
# merge county(geodata) and alkaline_df3(alkaline values) frames
# perform left merge to preserve every row on county geodata
merged = county.merge(alkaline_df3,how = 'left')

# replace NaN values with string NoData
merged.fillna('No data',inplace=True)
merged.head()
Out[9]:
COUNTY geometry ALKALINITY
0 Turkana POLYGON ((35.79592513993344 5.344485759410418,... 85
1 Marsabit POLYGON ((36.05060958836197 4.456217766237103,... 344.889
2 Mandera POLYGON ((41.62132644625865 3.97673463779347, ... No data
3 Wajir POLYGON ((39.31811523443071 3.47196984263428, ... 178.333
4 West Pokot POLYGON ((35.12744522072052 2.622711658216588,... No data
In [10]:
import json

# read data(a dataframe) to json
merged_json = json.loads(merged.to_json())

# convert to string like object
json_data = json.dumps(merged_json)
# json_data
In [11]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

palette = brewer['YlGnBu'][8]

#Reverse color order so that dark blue is highest alkalinity
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 1, high = 38600)

#Define custom tick labels for color bar.
tick_labels = {'0': '0%','9650':'25%', '19300':'50%','28950':'75%', '38600': '100%'}

#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

#Create figure object.
p = figure(title = 'Alkalinity of water in Kenyan counties', plot_height = 700 , plot_width = 700, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'ALKALINITY', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)
Loading BokehJS ...