from IPython.core.display import Image
import collections
def plot_basics(counter, data_inst, fig, units):
from powerlaw import plot_pdf, Fit, pdf
annotate_coord = (-.4, .95)
ax = fig.add_subplot(n_graphs,n_data,data_inst)
ax.plot(counter.keys(),counter.values(),marker='o',linestyle='')
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel(units)
def plot_basics_ccdf(data, data_inst, fig, units):
from powerlaw import plot_ccdf, Fit, pdf
annotate_coord = (-.4, .95)
ax = fig.add_subplot(n_graphs,n_data,data_inst)
plot_ccdf(data, ax=ax, color='r', linewidth=.5)
ax.set_xlabel(units)
def plot_fit_pdf(data, data_inst, fig, units):
from powerlaw import plot_ccdf, Fit, pdf, plot_pdf
annotate_coord = (-.4, .95)
ax = fig.add_subplot(n_graphs,n_data,data_inst)
fit = Fit(data, discrete=True)
fit.plot_pdf(ax=ax, color='r')
fit.power_law.plot_pdf(ax=ax, linestyle=':', color='g')
fit.exponential.plot_pdf(ax=ax, linestyle=':', color='b')
ax.set_ylim(0.00000001, 1)
#ax.set_yticks(ax3.get_yticks()[::2])
#ax.set_xlim(ax1.get_xlim()
#plot_ccdf(data, ax=ax, color='r', linewidth=.5)
ax.set_xlabel(units)
def plot_fit_ccdf(data, data_inst, fig, units):
from powerlaw import plot_ccdf, Fit, pdf, plot_pdf
annotate_coord = (-.4, .95)
ax = fig.add_subplot(n_graphs,n_data,data_inst)
fit = Fit(data, discrete=True)
fit.plot_ccdf(ax=ax, color='r')
fit.power_law.plot_ccdf(ax=ax, linestyle=':', color='g')
fit = Fit(data, discrete=True, xmin=1)
#plot_ccdf(data, ax=ax, color='r')
fit.power_law.plot_ccdf(ax=ax, linestyle='--', color='b')
#fit.exponential.plot_ccdf(ax=ax, linestyle=':', color='b')
if data_inst == 1:
ax.set_ylim(0.00001, 1)
else:
ax.set_ylim(0.001, 1)
#ax.set_yticks(ax3.get_yticks()[::2])
#ax.set_xlim(ax1.get_xlim()
#plot_ccdf(data, ax=ax, color='r', linewidth=.5)
ax.set_xlabel(units)
from os import listdir
files = listdir('.')
if 'blackouts.txt' not in files:
import urllib
urllib.urlretrieve('https://raw.github.com/jeffalstott/powerlaw/master/manuscript/blackouts.txt', 'blackouts.txt')
if 'words.txt' not in files:
import urllib
urllib.urlretrieve('https://raw.github.com/jeffalstott/powerlaw/master/manuscript/words.txt', 'words.txt')
if 'worm.txt' not in files:
import urllib
urllib.urlretrieve('https://raw.github.com/jeffalstott/powerlaw/master/manuscript/worm.txt', 'worm.txt')
from numpy import genfromtxt
blackouts = genfromtxt('blackouts.txt')#/10**3
words = genfromtxt('words.txt')
worm = genfromtxt('worm.txt')
worm = worm[worm>1]
n_data = 3
n_graphs = 1
f = figure(figsize=(8,4))
data = words
data_inst = 1
units = 'Word Frequency'
plot_basics(collections.Counter(words), data_inst, f, units)
data_inst = 2
#data = city
#units = 'City Population'
data = worm
units = 'Neuron Connections'
plot_basics(collections.Counter(worm), data_inst, f, units)
data = blackouts
data_inst = 3
units = 'Population Affected\nby Blackouts'
plot_basics(collections.Counter(blackouts), data_inst, f, units)
f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
f.savefig('basics_frequ.png', bbox_inches='tight')
plt.close()
Image(filename='basics_frequ.png')
n_data = 3
n_graphs = 1
f = figure(figsize=(8,4))
data = words
data_inst = 1
units = 'Word Frequency'
plot_basics_ccdf(data, data_inst, f, units)
data_inst = 2
#data = city
#units = 'City Population'
data = worm
units = 'Neuron Connections'
plot_basics_ccdf(data, data_inst, f, units)
data = blackouts
data_inst = 3
units = 'Population Affected\nby Blackouts'
plot_basics_ccdf(data, data_inst, f, units)
f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
f.savefig('basics_ccdf.png', bbox_inches='tight')
plt.close()
Image(filename='basics_ccdf.png')
n_data = 3
n_graphs = 1
f = figure(figsize=(8,4))
data = words
data_inst = 1
units = 'Word Frequency'
plot_fit_ccdf(data, data_inst, f, units)
data_inst = 2
#data = city
#units = 'City Population'
data = worm
units = 'Neuron Connections'
plot_fit_ccdf(data, data_inst, f, units)
data = blackouts
data_inst = 3
units = 'Population Affected\nby Blackouts'
plot_fit_ccdf(data, data_inst, f, units)
f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
f.savefig('fit_ccdf.png', bbox_inches='tight')
plt.close()
Image(filename='fit_ccdf.png')
Calculating best minimal value for power law fit Calculating best minimal value for power law fit Calculating best minimal value for power law fit
n_data = 3
n_graphs = 1
f = figure(figsize=(8,4))
data = words
data_inst = 1
units = 'Word Frequency'
plot_fit_pdf(data, data_inst, f, units)
data_inst = 2
#data = city
#units = 'City Population'
data = worm
units = 'Neuron Connections'
plot_fit_pdf(data, data_inst, f, units)
data = blackouts
data_inst = 3
units = 'Population Affected\nby Blackouts'
plot_fit_pdf(data, data_inst, f, units)
f.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.3, hspace=.2)
f.savefig('fit_pdf.png', bbox_inches='tight')
plt.close()
Image(filename='fit_pdf.png')
Calculating best minimal value for power law fit Calculating best minimal value for power law fit Calculating best minimal value for power law fit
import powerlaw
data = words
fit = powerlaw.Fit(data, discrete=True)
print "xmin", fit.xmin
print "alpha", fit.alpha
print "D", fit.D
Calculating best minimal value for power law fit xmin 7.0 alpha 1.95015722691 D 0.00922886388026
data = worm
fit = powerlaw.Fit(data, discrete=True)
print "xmin", fit.xmin
print "alpha", fit.alpha
print "D", fit.D
Calculating best minimal value for power law fit xmin 4.0 alpha 3.06809864499 D 0.0320698357516
data = blackouts
fit = powerlaw.Fit(data, discrete=True)
print "xmin", fit.xmin
print "alpha", fit.alpha
print "D", fit.D
Calculating best minimal value for power law fit xmin 230000.0 alpha 2.27263369895 D 0.060673358949
import powerlaw
data = words
fit = powerlaw.Fit(data, discrete=True, xmin=1)
print "xmin", fit.xmin
print "alpha", fit.alpha
print "D", fit.D
xmin 1 alpha 1.65513313351 D 0.0529016945871
data = worm
fit = powerlaw.Fit(data, discrete=True, xmin=1)
print "xmin", fit.xmin
print "alpha", fit.alpha
print "D", fit.D
xmin 1 alpha 1.49659463104 D 0.380827698141
data = blackouts
fit = powerlaw.Fit(data, discrete=True, xmin=1)
print "xmin", fit.xmin
print "alpha", fit.alpha
print "D", fit.D
xmin 1 alpha 1.08235393024 D 0.509206386103
data = words
fit = powerlaw.Fit(data, discrete=True)
R, p = fit.distribution_compare('power_law', 'lognormal')
print R, p
R, p = fit.distribution_compare('power_law', 'exponential')
print R, p
fig = fit.plot_ccdf(linewidth=3, label='Empirical Data')
fit.power_law.plot_ccdf(ax=fig, color='r', linestyle='--', label='Power law fit')
fit.lognormal.plot_ccdf(ax=fig, color='g', linestyle='--', label='Lognormal fit')
fit.exponential.plot_ccdf(ax=fig, color='c', linestyle='--', label='Exponential fit')
####
fig.set_ylabel(r"$p(X\geq x)$")
fig.set_xlabel(r"Word Frequency")
handles, labels = fig.get_legend_handles_labels()
fig.legend(handles, labels, loc=3)
savefig('word_compare.png', bbox_inches='tight')
plt.close()
Image(filename='word_compare.png')
Calculating best minimal value for power law fit 0.928017881164 0.425845694403 3025.02292587 6.48561424138e-20