import pandas as pd
from altair import Chart, X, Y, Axis, Color
outbrks_before_2014 = pd.read_csv("data/in/ebola-outbreaks-before-2014-cleaned.csv", encoding="utf-8", index_col=False)
outbrks_before_2014.head(2)
Start date | End date | Duration (days) | Country name | Ebola subtype | Reported number of human cases | Reported number of deaths among cases | Reported % of deaths among cases | |
---|---|---|---|---|---|---|---|---|
0 | 2012-11-01 | 2013-01-31 | 91 | Uganda | Sudan virus | 6 | 3 | 0.500 |
1 | 2012-06-01 | 2012-11-30 | 182 | Democratic Republic of the Congo | Bundibugyo virus | 36 | 13 | 0.361 |
outbrks_before_2014.tail(2)
Start date | End date | Duration (days) | Country name | Ebola subtype | Reported number of human cases | Reported number of deaths among cases | Reported % of deaths among cases | |
---|---|---|---|---|---|---|---|---|
31 | 1976-01-01 | 1976-12-31 | 365 | Sudan (South Sudan) | Sudan virus | 284 | 151 | 0.53 |
32 | 1976-01-01 | 1976-12-31 | 365 | Democratic Republic of the Congo | Zaire virus | 318 | 280 | 0.88 |
"There has been {} Ebola outbreaks before 2014".format(len(outbrks_before_2014))
'There has been 33 Ebola outbreaks before 2014'
num_outbrks = Chart(outbrks_before_2014).mark_bar().encode(
X('count(*):Q', axis=Axis(
title='Number of outbreaks',
)),
Y('Country name:O')
)
num_outbrks
unique_countries = set(outbrks_before_2014['Country name'])
print("There has been outbreaks in {} countries:".format(len(unique_countries)))
for name in unique_countries:
print("* in {0} -> {1}".format(name, list(outbrks_before_2014['Country name']).count(name)))
There has been outbreaks in 11 countries: * in Uganda -> 5 * in England -> 1 * in South Africa -> 1 * in Democratic Republic of the Congo -> 9 * in Russia -> 2 * in Italy -> 1 * in Gabon -> 4 * in USA -> 3 * in Philippines -> 3 * in Sudan (South Sudan) -> 3 * in Côte d'Ivoire (Ivory Coast) -> 1
outbrks_time= Chart(outbrks_before_2014).mark_bar().encode(
x=X('Start date:T',
axis=Axis(
title='time',
),
),
x2='End date:T',
y='Country name:N',
color='Country name:N'
)
outbrks_time
cases = Chart(outbrks_before_2014).mark_bar(color='#f2b47b').encode(
X('average(Reported number of human cases):Q'),
Y('Country name:O')
)
cases
deceased = Chart(outbrks_before_2014).mark_bar(color='#aa4949').encode(
X('average(Reported number of deaths among cases):Q'),
Y('Country name:O')
)
deceased
Chart(outbrks_before_2014).mark_circle(color='#f2b47b').encode(
size='average(Reported number of human cases):Q',
x=X('Ebola subtype:N'),
y=Y('Country name:N'),
)
Chart(outbrks_before_2014).mark_circle(color='#aa4949').encode(
size='average(Reported number of deaths among cases):Q',
x=X('Ebola subtype:N'),
y=Y('Country name:N'),
)
cases_deaths_virus = Chart(outbrks_before_2014).mark_point().encode(
color='Ebola subtype:N',
x='Reported number of human cases:Q',
y='Reported number of deaths among cases:Q',
)
cases_deaths_virus
Chart(outbrks_before_2014).mark_circle().encode(
size='average(Reported number of deaths among cases):Q',
x=X('Ebola subtype:N'),
y=Y('Country name:N'),
color='Country name:N'
)
df_uganda = outbrks_before_2014[outbrks_before_2014["Country name"] == "Uganda"]
Chart(df_uganda).mark_bar().encode(
x=X('Start date:T',
axis=Axis(
title='time',
),
),
x2='End date:T',
y='Ebola subtype:N',
color='Ebola subtype:N'
)
Chart(outbrks_before_2014).mark_bar().encode(
x=X('Start date:T',
axis=Axis(
title='time',
),
),
x2='End date:T',
y='Country name:N',
color='Ebola subtype:N'
)
deaths_virus_time = Chart(outbrks_before_2014).mark_circle().encode(
size='average(Reported number of deaths among cases):Q',
x=X('Start date:T',
axis=Axis(
title='time',
),
),
x2='End date:T',
y='Country name:N',
color='Ebola subtype:N'
)
deaths_virus_time
28,637 reported cases and 11,315 deaths
sources: WHO (who.int/mediacentre/factsheets/fs103/en/), BBC (bbc.com/news/world-africa-28755033)
import json
# Fix to add to each plot
fix = {"$schema": "https://vega.github.io/schema/vega-lite/v2.0.json",
"description": "A simple bar chart with embedded data."}
# Add brush
brush = {"selection": {"brush": {"type": "interval"}}}
# Add paintbrush
paintbrush = {"selection": {"paintbrush": {
"type": "multi", "on": "mouseover",
"nearest": True}}}
# for a paintbrush add size into `encoding`
size = {"condition": {"selection": "paintbrush", "value": 300},
"value": 50}
# For zoom capacity
zoom = {"selection": { "grid": {"type": "interval", "bind": "scales"}}}
def from_dict_to_json(data_dict, add_on, interact):
#remove the data key/value from the plot info
data_dict.pop("data", None)
data_dict.update(add_on)
data_dict.update(interact)
return json.dumps(data_dict)
num_outbrks_dict = num_outbrks.to_dict()
outbrks_data_json = json.dumps(num_outbrks_dict["data"])
with open("data_for_viz.json", "w") as outfile:
outfile.write(outbrks_data_json)
# Plot 1 - outbreaks/country
from_dict_to_json(num_outbrks_dict, fix, brush)
'{"mark": "bar", "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json", "encoding": {"y": {"type": "ordinal", "field": "Country name"}, "x": {"axis": {"title": "Number of outbreaks"}, "aggregate": "count", "type": "quantitative", "field": "*"}}, "description": "A simple bar chart with embedded data.", "selection": {"brush": {"type": "interval"}}}'
# Plot 2 - outbreaks over time
from_dict_to_json(outbrks_time.to_dict(), fix, zoom)
'{"mark": "bar", "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json", "encoding": {"y": {"type": "nominal", "field": "Country name"}, "color": {"type": "nominal", "field": "Country name"}, "x2": {"type": "temporal", "field": "End date"}, "x": {"axis": {"title": "time"}, "type": "temporal", "field": "Start date"}}, "description": "A simple bar chart with embedded data.", "selection": {"grid": {"bind": "scales", "type": "interval"}}}'
# Plot 3 - cases vs deaths per virus subtype
cases_deaths_virus_dict = cases_deaths_virus.to_dict()
cases_deaths_virus_dict["encoding"]["size"] = size
cases_deaths_virus_dict.update(paintbrush)
from_dict_to_json(cases_deaths_virus_dict, fix, zoom)
'{"mark": "point", "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json", "selection": {"grid": {"bind": "scales", "type": "interval"}}, "encoding": {"color": {"type": "nominal", "field": "Ebola subtype"}, "y": {"type": "quantitative", "field": "Reported number of deaths among cases"}, "x": {"type": "quantitative", "field": "Reported number of human cases"}, "size": {"value": 50, "condition": {"value": 300, "selection": "paintbrush"}}}, "description": "A simple bar chart with embedded data."}'
# Plot 4 - number of outbreaks
from_dict_to_json(deaths_virus_time.to_dict(), fix, zoom)
'{"mark": "circle", "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json", "encoding": {"y": {"type": "nominal", "field": "Country name"}, "color": {"type": "nominal", "field": "Ebola subtype"}, "x2": {"type": "temporal", "field": "End date"}, "x": {"axis": {"title": "time"}, "type": "temporal", "field": "Start date"}, "size": {"aggregate": "average", "type": "quantitative", "field": "Reported number of deaths among cases"}}, "description": "A simple bar chart with embedded data.", "selection": {"grid": {"bind": "scales", "type": "interval"}}}'