# ms-python.python added
import os
try:
os.chdir(os.path.join(os.getcwd(), 'MyGeoScripts'))
print(os.getcwd())
except:
pass
import geopandas as gpd
import pandas as pd
shapefile = '../Data/County/County.shp'
# read shape file using geopandas
county = gpd.read_file(shapefile)
county.head()
OBJECTID | AREA | PERIMETER | COUNTY3_ | COUNTY3_ID | COUNTY | Shape_Leng | Shape_Area | geometry | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 5.677 | 15.047 | 2.0 | 1.0 | Turkana | 15.046838 | 5.676985 | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... |
1 | 2 | 6.177 | 11.974 | 3.0 | 2.0 | Marsabit | 11.974165 | 6.176831 | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... |
2 | 3 | 2.117 | 7.355 | 4.0 | 3.0 | Mandera | 7.355154 | 2.117196 | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... |
3 | 4 | 4.610 | 9.838 | 5.0 | 4.0 | Wajir | 9.838408 | 4.609589 | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... |
4 | 5 | 0.740 | 5.030 | 6.0 | 5.0 | West Pokot | 5.030271 | 0.740481 | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... |
county = gpd.read_file(shapefile)[['COUNTY','geometry']]
county.head()
COUNTY | geometry | |
---|---|---|
0 | Turkana | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... |
1 | Marsabit | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... |
2 | Mandera | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... |
3 | Wajir | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... |
4 | West Pokot | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... |
datafile = '../Data/County/kewi_water_test_and_results1.csv'
df = pd.read_csv(datafile,sep=',')
df.head()
SAMPLE_NO | DATE | TYPE_OF_H2O | COUNTY | TYPE_OF_ANALYSIS | PH | ALKALINITY_Mg/L | CONDUCTIVITY?S/cm | Total_Dissolved_Solids | COLOUR | OBJECTID | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 19T | NaN | Tank | Machakos | Jar Test/Bacteriological Analysis/Full Chemica... | 7.90 | 1.0 | 136.40 | 220.0 | 25 | 396 |
1 | 1484 | 18/6/2015 | Effluent | Kajiado | Effluent | 6.60 | 1.4 | 1001.92 | 1616.0 | 20 | 379 |
2 | 176 | 13/6/16 | Borehole | Kiambu | Full Chemical Analysis & Bacteriological Analysis | 5.40 | 2.0 | 35.34 | 57.0 | 0 | 472 |
3 | 89 | 4/6/2016 | Tap | Murang'a | Full Chemical Analysis/Bacteriological Analysis | 5.70 | 2.0 | 124.00 | 200.0 | 2.5 | 434 |
4 | 737 | 4/24/2012 | Borehole | Nairobi | Full Chemical Analysis & Bacteriological Analysis | 8.21 | 2.0 | 15.00 | 9.2 | 2.5 | 43 |
#take county and water alkalinity levels
alkaline_df1 = df[['COUNTY','ALKALINITY_Mg/L']]
#renamed second column
alkaline_df1.rename(columns={"ALKALINITY_Mg/L" : "ALKALINITY"}, inplace=True)
alkaline_df1.head()
alkaline_df1.describe()
/tmp/ipykernel_6062/4216690743.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy alkaline_df1.rename(columns={"ALKALINITY_Mg/L" : "ALKALINITY"}, inplace=True)
ALKALINITY | |
---|---|
count | 392.000000 |
mean | 674.590306 |
std | 2736.531968 |
min | 1.000000 |
25% | 38.250000 |
50% | 98.000000 |
75% | 297.000000 |
max | 38600.000000 |
# remove rows with missing values in alkalinity
alkaline_df2= alkaline_df1.dropna()
alkaline_df2.head()
COUNTY | ALKALINITY | |
---|---|---|
0 | Machakos | 1.0 |
1 | Kajiado | 1.4 |
2 | Kiambu | 2.0 |
3 | Murang'a | 2.0 |
4 | Nairobi | 2.0 |
alkaline_df2.dtypes
COUNTY object ALKALINITY float64 dtype: object
# remove duplicated county names by getting the mean alkalinity of the respective duplicates and having it as one county
alkaline_df3 = alkaline_df2.groupby('COUNTY').mean().reset_index()
alkaline_df3.head()
alkaline_df1.describe()
ALKALINITY | |
---|---|
count | 392.000000 |
mean | 674.590306 |
std | 2736.531968 |
min | 1.000000 |
25% | 38.250000 |
50% | 98.000000 |
75% | 297.000000 |
max | 38600.000000 |
# merge county(geodata) and alkaline_df3(alkaline values) frames
# perform left merge to preserve every row on county geodata
merged = county.merge(alkaline_df3,how = 'left')
# replace NaN values with string NoData
merged.fillna('No data',inplace=True)
merged.head()
COUNTY | geometry | ALKALINITY | |
---|---|---|---|
0 | Turkana | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... | 85.0 |
1 | Marsabit | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... | 344.888889 |
2 | Mandera | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... | No data |
3 | Wajir | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... | 178.333333 |
4 | West Pokot | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... | No data |
import json
# read data(a dataframe) to json
merged_json = json.loads(merged.to_json())
# convert to string like object
json_data = json.dumps(merged_json)
# json_data
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)
palette = brewer['YlGnBu'][8]
#Reverse color order so that dark blue is highest alkalinity
palette = palette[::-1]
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 1, high = 38600)
#Define custom tick labels for color bar.
tick_labels = {'0': '0%','9650':'25%', '19300':'50%','28950':'75%', '38600': '100%'}
#Create color bar.
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)
#Create figure object.
p = figure(title = 'Alkalinity of water in Kenyan counties', plot_height = 700 , plot_width = 700, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#Add patch renderer to figure.
p.patches('xs','ys', source = geosource,fill_color = {'field' :'ALKALINITY', 'transform' : color_mapper},
line_color = 'black', line_width = 0.25, fill_alpha = 1)
#Specify figure layout.
p.add_layout(color_bar, 'below')
#Display figure inline in Jupyter Notebook.
output_notebook()
#Display figure.
show(p)
from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool
from bokeh.layouts import widgetbox, row, column
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)
#Define a sequential multi-hue color palette.
palette = brewer['BuPu'][9]
#Reverse color order so that dark blue is highest alkalinity.
palette = palette[::-1]
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. Input nan_color.
color_mapper = LinearColorMapper(palette = palette, low = 1, high = 1000, nan_color = '#d9d9d9')
#Define custom tick labels for color bar.
#tick_labels = {'0': 'No data','200':'200', '19300':'50%','28950':'75%', '38600': '100%'}
#Add hover tool
hover = HoverTool(tooltips = [ ('Country/region','@COUNTY'),('% ALKALINITY_MG/L', '@ALKALINITY')])
#Create color bar.
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 1000, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)
#Create figure object.
p = figure(title = 'Alkalinity of water in Kenyan counties', plot_height = 750 , plot_width = 700, toolbar_location = None, tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#Add patch renderer to figure.
p.patches('xs','ys', source = geosource,fill_color = {'field' :'ALKALINITY', 'transform' : color_mapper},
line_color = 'black', line_width = 0.25, fill_alpha = 1)
p.add_layout(color_bar, 'below')
# # Make a column layout of widgetbox(slider) and plot, and add it to the current document
layout = column(p,widgetbox())
curdoc().add_root(layout)
#Display plot inline in Jupyter notebook
output_notebook()
#Display plot
show(layout)