In [17]:

# ms-python.python added
import os
try:
	os.chdir(os.path.join(os.getcwd(), 'MyGeoScripts'))
	print(os.getcwd())
except:
	pass

In [4]:

import geopandas as gpd
import pandas as pd

shapefile = '../Data/County/County.shp'

# read shape file using geopandas
county = gpd.read_file(shapefile)
county.head()

Out[4]:

	OBJECTID	AREA	PERIMETER	COUNTY3_	COUNTY3_ID	COUNTY	Shape_Leng	Shape_Area	geometry
0	1	5.677	15.047	2.0	1.0	Turkana	15.046838	5.676985	POLYGON ((35.79593 5.34449, 35.79659 5.34468, ...
1	2	6.177	11.974	3.0	2.0	Marsabit	11.974165	6.176831	POLYGON ((36.05061 4.45622, 36.23184 4.45124, ...
2	3	2.117	7.355	4.0	3.0	Mandera	7.355154	2.117196	POLYGON ((41.62133 3.97673, 41.62272 3.97860, ...
3	4	4.610	9.838	5.0	4.0	Wajir	9.838408	4.609589	POLYGON ((39.31812 3.47197, 39.31956 3.47168, ...
4	5	0.740	5.030	6.0	5.0	West Pokot	5.030271	0.740481	POLYGON ((35.12745 2.62271, 35.12762 2.62302, ...

In [5]:

county = gpd.read_file(shapefile)[['COUNTY','geometry']]
county.head()

Out[5]:

	COUNTY	geometry
0	Turkana	POLYGON ((35.79593 5.34449, 35.79659 5.34468, ...
1	Marsabit	POLYGON ((36.05061 4.45622, 36.23184 4.45124, ...
2	Mandera	POLYGON ((41.62133 3.97673, 41.62272 3.97860, ...
3	Wajir	POLYGON ((39.31812 3.47197, 39.31956 3.47168, ...
4	West Pokot	POLYGON ((35.12745 2.62271, 35.12762 2.62302, ...

In [6]:

datafile = '../Data/County/kewi_water_test_and_results1.csv'
df = pd.read_csv(datafile,sep=',')
df.head()

Out[6]:

	SAMPLE_NO	DATE	TYPE_OF_H2O	COUNTY	TYPE_OF_ANALYSIS	PH	ALKALINITY_Mg/L	CONDUCTIVITY?S/cm	Total_Dissolved_Solids	COLOUR	OBJECTID
0	19T	NaN	Tank	Machakos	Jar Test/Bacteriological Analysis/Full Chemica...	7.90	1.0	136.40	220.0	25	396
1	1484	18/6/2015	Effluent	Kajiado	Effluent	6.60	1.4	1001.92	1616.0	20	379
2	176	13/6/16	Borehole	Kiambu	Full Chemical Analysis & Bacteriological Analysis	5.40	2.0	35.34	57.0	0	472
3	89	4/6/2016	Tap	Murang'a	Full Chemical Analysis/Bacteriological Analysis	5.70	2.0	124.00	200.0	2.5	434
4	737	4/24/2012	Borehole	Nairobi	Full Chemical Analysis & Bacteriological Analysis	8.21	2.0	15.00	9.2	2.5	43

In [7]:

#take county and water alkalinity levels
alkaline_df1 = df[['COUNTY','ALKALINITY_Mg/L']]

#renamed second column
alkaline_df1.rename(columns={"ALKALINITY_Mg/L" : "ALKALINITY"}, inplace=True)
alkaline_df1.head()
alkaline_df1.describe()

/tmp/ipykernel_6062/4216690743.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  alkaline_df1.rename(columns={"ALKALINITY_Mg/L" : "ALKALINITY"}, inplace=True)

Out[7]:

	ALKALINITY
count	392.000000
mean	674.590306
std	2736.531968
min	1.000000
25%	38.250000
50%	98.000000
75%	297.000000
max	38600.000000

In [8]:

# remove rows with missing values in alkalinity
alkaline_df2= alkaline_df1.dropna()
alkaline_df2.head()

Out[8]:

	COUNTY	ALKALINITY
0	Machakos	1.0
1	Kajiado	1.4
2	Kiambu	2.0
3	Murang'a	2.0
4	Nairobi	2.0

In [9]:

alkaline_df2.dtypes

Out[9]:

COUNTY         object
ALKALINITY    float64
dtype: object

In [10]:

# remove duplicated county names by getting the mean alkalinity of the respective duplicates and having it as one county
alkaline_df3 = alkaline_df2.groupby('COUNTY').mean().reset_index()
alkaline_df3.head()
alkaline_df1.describe()

Out[10]:

	ALKALINITY
count	392.000000
mean	674.590306
std	2736.531968
min	1.000000
25%	38.250000
50%	98.000000
75%	297.000000
max	38600.000000

In [11]:

# merge county(geodata) and alkaline_df3(alkaline values) frames
# perform left merge to preserve every row on county geodata
merged = county.merge(alkaline_df3,how = 'left')

# replace NaN values with string NoData
merged.fillna('No data',inplace=True)
merged.head()

Out[11]:

	COUNTY	geometry	ALKALINITY
0	Turkana	POLYGON ((35.79593 5.34449, 35.79659 5.34468, ...	85.0
1	Marsabit	POLYGON ((36.05061 4.45622, 36.23184 4.45124, ...	344.888889
2	Mandera	POLYGON ((41.62133 3.97673, 41.62272 3.97860, ...	No data
3	Wajir	POLYGON ((39.31812 3.47197, 39.31956 3.47168, ...	178.333333
4	West Pokot	POLYGON ((35.12745 2.62271, 35.12762 2.62302, ...	No data

In [12]:

import json

# read data(a dataframe) to json
merged_json = json.loads(merged.to_json())

# convert to string like object
json_data = json.dumps(merged_json)
# json_data

In [13]:

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

palette = brewer['YlGnBu'][8]

#Reverse color order so that dark blue is highest alkalinity
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 1, high = 38600)

#Define custom tick labels for color bar.
tick_labels = {'0': '0%','9650':'25%', '19300':'50%','28950':'75%', '38600': '100%'}

#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

#Create figure object.
p = figure(title = 'Alkalinity of water in Kenyan counties', plot_height = 700 , plot_width = 700, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'ALKALINITY', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)

Loading BokehJS ...

In [14]:

from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool
from bokeh.layouts import widgetbox, row, column


#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

#Define a sequential multi-hue color palette.
palette = brewer['BuPu'][9]

#Reverse color order so that dark blue is highest alkalinity.
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. Input nan_color.
color_mapper = LinearColorMapper(palette = palette, low = 1, high = 1000, nan_color = '#d9d9d9')

#Define custom tick labels for color bar.
#tick_labels = {'0': 'No data','200':'200', '19300':'50%','28950':'75%', '38600': '100%'}

#Add hover tool
hover = HoverTool(tooltips = [ ('Country/region','@COUNTY'),('% ALKALINITY_MG/L', '@ALKALINITY')])


#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 1000, height = 20,
                     border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)


#Create figure object.
p = figure(title = 'Alkalinity of water in Kenyan counties', plot_height = 750 , plot_width = 700, toolbar_location = None, tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'ALKALINITY', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)


p.add_layout(color_bar, 'below')

# # Make a column layout of widgetbox(slider) and plot, and add it to the current document
layout = column(p,widgetbox())
curdoc().add_root(layout)

#Display plot inline in Jupyter notebook
output_notebook()

#Display plot
show(layout)

BokehDeprecationWarning: 'WidgetBox' is deprecated and will be removed in Bokeh 3.0, use 'bokeh.models.Column' instead

Loading BokehJS ...

WARNING:bokeh.core.validation.check:W-1002 (EMPTY_LAYOUT): Layout has no children: WidgetBox(id='1142', ...)