from collections import Counter
from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import networkx as nx
import scipy
from scipy.optimize import curve_fit
import tqdm as tq
from tqdm.notebook import tqdm
import watermark
import epidemik
from epidemik import EpiModel, NetworkEpiModel
%load_ext watermark
%matplotlib inline
We start by print out the versions of the libraries we're using for future reference
%watermark -n -v -m -g -iv
Python implementation: CPython Python version : 3.13.2 IPython version : 8.32.0 Compiler : Clang 16.0.0 (clang-1600.0.26.6) OS : Darwin Release : 24.3.0 Machine : arm64 Processor : arm CPU cores : 16 Architecture: 64bit Git hash: 6d9f1a6eb4084a40d0e306ad0ba8a2eaa55e5c85 watermark : 2.5.0 epidemik : 0.1.2 matplotlib: 3.10.0 networkx : 3.4.2 pandas : 2.2.3 tqdm : 4.67.1 numpy : 2.2.2 scipy : 1.15.1
Load default figure style
plt.style.use('./d4sci.mplstyle')
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
Let us start with a network where eveyrone is connected to everybody else
np.random.seed(1234)
N = 300
beta = 0.05
G_full = nx.erdos_renyi_graph(N, p=1.)
And an SI model
SI_full = NetworkEpiModel(G_full)
SI_full.add_interaction('S', 'I', 'I', beta=beta)
SI_full
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) File ~/Library/Caches/pypoetry/virtualenvs/epidemiology101-UtPQcdCX-py3.13/lib/python3.13/site-packages/IPython/core/formatters.py:770, in PlainTextFormatter.__call__(self, obj) 763 stream = StringIO() 764 printer = pretty.RepresentationPrinter(stream, self.verbose, 765 self.max_width, self.newline, 766 max_seq_length=self.max_seq_length, 767 singleton_pprinters=self.singleton_printers, 768 type_pprinters=self.type_printers, 769 deferred_pprinters=self.deferred_printers) --> 770 printer.pretty(obj) 771 printer.flush() 772 return stream.getvalue() File ~/Library/Caches/pypoetry/virtualenvs/epidemiology101-UtPQcdCX-py3.13/lib/python3.13/site-packages/IPython/lib/pretty.py:419, in RepresentationPrinter.pretty(self, obj) 408 return meth(obj, self, cycle) 409 if ( 410 cls is not object 411 # check if cls defines __repr__ (...) 417 and callable(_safe_getattr(cls, "__repr__", None)) 418 ): --> 419 return _repr_pprint(obj, self, cycle) 421 return _default_pprint(obj, self, cycle) 422 finally: File ~/Library/Caches/pypoetry/virtualenvs/epidemiology101-UtPQcdCX-py3.13/lib/python3.13/site-packages/IPython/lib/pretty.py:794, in _repr_pprint(obj, p, cycle) 792 """A pprint that just redirects to the normal repr function.""" 793 # Find newlines and replace them with p.break_() --> 794 output = repr(obj) 795 lines = output.splitlines() 796 with p.group(): File ~/Library/Caches/pypoetry/virtualenvs/epidemiology101-UtPQcdCX-py3.13/lib/python3.13/site-packages/epidemik/EpiModel.py:562, in EpiModel.__repr__(self) 560 text += "Parameters:\n" 561 for rate, value in self.params.items(): --> 562 text += " %s : %f\n" % (rate, value) 563 text += "\n\nTransitions:\n" 565 for edge in self.transitions.edges(data=True): TypeError: must be real number, not str
print("kavg=", SI_full.kavg_)
print(SI_full.transitions.edges(data=True))
kavg= 299.0 [('S', 'I', {'agent': 'I', 'rate': 'rate'})]
We perform 100 runs
def simulate_runs(model, Nruns):
values = []
for i in tqdm(range(Nruns), total=Nruns):
model.simulate(100, seeds={30: 'I'})
values.append(model.I)
values = pd.DataFrame(values).T
values.columns = np.arange(values.shape[1])
return values
Nruns = 100
values_full = simulate_runs(SI_full, Nruns)
0%| | 0/100 [00:00<?, ?it/s]
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[10], line 2 1 Nruns = 100 ----> 2 values_full = simulate_runs(SI_full, Nruns) Cell In[9], line 5, in simulate_runs(model, Nruns) 2 values = [] 4 for i in tqdm(range(Nruns), total=Nruns): ----> 5 model.simulate(100, seeds={30: 'I'}) 6 values.append(model.I) 8 values = pd.DataFrame(values).T File ~/Library/Caches/pypoetry/virtualenvs/epidemiology101-UtPQcdCX-py3.13/lib/python3.13/site-packages/epidemik/NetworkEpiModel.py:112, in NetworkEpiModel.simulate(self, timesteps, seeds, **kwargs) 109 prob = self.rng.random() 111 rate = self.params[infections[state_i][state_j]["rate"]] --> 112 if prob < rate: 113 new_state = infections[state_i][state_j]["target"] 114 population[t, node_j] = new_state TypeError: '<' not supported between instances of 'float' and 'str'
And plot them. Each run has it's own stochastic path, despite the strong connectivity constraint
fig, ax = plt.subplots(1)
values_full.median(axis=1).plot(ax=ax, color=colors[0])
values_full.plot(ax=ax, color=colors[0], lw=.5)
ax.get_legend().remove()
ax.set_xlim(0, 20)
ax.legend(['Median Run', 'Individual Runs'])
ax.set_xlabel('Time')
ax.set_ylabel('Population')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[11], line 3 1 fig, ax = plt.subplots(1) ----> 3 values_full.median(axis=1).plot(ax=ax, color=colors[0]) 4 values_full.plot(ax=ax, color=colors[0], lw=.5) 5 ax.get_legend().remove() NameError: name 'values_full' is not defined
kmean = SI_full.kavg_
print(kmean)
We can obtain a specific average degree by setting: $$p=\frac{\langle k\rangle}{N}$$ We choose this specific value so that we match the average degree of the BA model below
G_small = nx.erdos_renyi_graph(N, p=0.0198)
SI_small = NetworkEpiModel(G_small)
SI_small.add_interaction('S', 'I', 'I', beta)
SI_small
SI_small.kavg_
values_small = simulate_runs(SI_small, Nruns)
fig, ax = plt.subplots(1)
values_small.median(axis=1).plot(ax=ax, color=colors[1])
values_small.plot(ax=ax, color=colors[1], lw=.3)
ax.get_legend().remove()
ax.legend(['Median Run', 'Individual Runs'])
ax.set_xlabel('Time')
ax.set_ylabel('Population')
time = np.arange(0, 100)
yt = lambda time, beta: N/(1+(N-1)*np.exp(-beta*N*time))
coef, covariance = curve_fit(yt, time, values_small.median(axis=1).values)
coef
fig, ax = plt.subplots(1)
ax.plot(time, yt(time, *coef), color=colors[5])
values_small.median(axis=1).plot(ax=ax, color=colors[1])
values_small.plot(ax=ax, color=colors[1], lw=.3)
ax.get_legend().remove()
ax.legend(['Fit', 'Median Run', 'Individual Runs'])
ax.set_xlabel('Time')
ax.set_ylabel('Population')
We can compare the two scenarios above by plotting the median number of infected node as a function of time. Not surprisingly, the network with the smallest average connectivity is slower
fig, ax = plt.subplots(1)
values_small.median(axis=1).plot(color=colors[1], ax=ax, label='ER Network')
values_full.median(axis=1).plot(color=colors[0], ax=ax, label='Full Network')
ax.legend()
ax.set_xlabel('Time')
ax.set_ylabel('Population')
plt.plot(dict(G_small.degree()).values())
ax = plt.gca()
ax.set_xlabel('Node')
ax.set_ylabel('Degree')
But what about two networks with exactly the same average degree, but different topologies? Let's run a Barabasi albert model
BA = nx.barabasi_albert_graph(N, m=3)
SI_BA = NetworkEpiModel(BA)
SI_BA.add_interaction('S', 'I', 'I', beta)
SI_BA
The average degree is exactly the same
SI_BA.kavg_
values_BA = simulate_runs(SI_BA, Nruns)
fig, ax = plt.subplots(1)
values_BA.median(axis=1).plot(color=colors[2], ax=ax)
values_BA.plot(ax=ax, color=colors[2], lw=.1)
ax.get_legend().remove()
ax.legend(['Median Run', 'Individual Runs'])
ax.set_xlabel('Time')
ax.set_ylabel('Population')
coef, covariance = curve_fit(yt, time, values_BA.median(axis=1).values)
coef
fig, ax = plt.subplots(1)
ax.plot(time, yt(time, *coef), color=colors[5])
values_BA.median(axis=1).plot(ax=ax, color=colors[2])
values_BA.plot(ax=ax, color=colors[2], lw=.3)
ax.get_legend().remove()
ax.legend(['Fit', 'Median Run', 'Individual Runs'])
ax.set_xlabel('Time')
ax.set_ylabel('Population')
plt.plot(dict(BA.degree()).values())
ax = plt.gca()
ax.set_xlabel('Node')
ax.set_ylabel('Degree')
fig, ax = plt.subplots(1)
values_small.median(axis=1).plot(color=colors[1], ax=ax, label='ER Network')
values_BA.median(axis=1).plot(color=colors[2], ax=ax, label='BA Network')
ax.legend()
ax.set_xlabel('Time')
ax.set_ylabel('Population')
fig, ax = plt.subplots(1)
values_full.median(axis=1).plot(color=colors[0], ax=ax, label='Full Network')
values_small.median(axis=1).plot(color=colors[1], ax=ax, label='ER Network')
values_BA.median(axis=1).plot(color=colors[2], ax=ax, label='BA Network')
ax.legend()
ax.set_xlabel('Time')
ax.set_ylabel('Population')