Sebastian Raschka
last updated: 05/25/2014

# Day 2 - One Python Benchmark per Day¶

### Mean functions¶

In [1]:
# The statistics module has been added to
# the standard library in Python 3.4

import statistics as stats
import numpy as np

def calc_mean(samples):
return sum(samples)/float(len(samples))

def np_mean(samples):
return np.mean(samples)

def np_mean_ary(np_array):
return np.mean(np_array)

def st_mean(samples):
return stats.mean(samples)

def np_convert_and_mean_ary(samples):
return np.mean(np.array(samples))


### Verifying that functions work correctly¶

In [2]:
n = 1000
samples = list(range(n))
samples_array = np.arange(n)

assert(st_mean(samples) == np_mean(samples)
== calc_mean(samples) == np_mean_ary(samples_array) == np_convert_and_mean_ary(samples))
print('ok')

ok


### Timing¶

In [3]:
import timeit

funcs = ['st_mean', 'np_mean', 'calc_mean', 'np_mean_ary', 'np_convert_and_mean_ary']
orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
samples = list(range(n))
for f in funcs:
if f == 'np_mean_ary':
samples = np.arange(n)
times_n[f].append(min(timeit.Timer('%s(samples)' %f,
'from __main__ import %s, samples' %f)
.repeat(repeat=3, number=1000)))


### Setting up the plot¶

In [4]:
import platform
import multiprocessing

def print_sysinfo():

print('\nPython version:', platform.python_version())
print('NumPy version', np.__version__)
print('compiler:', platform.python_compiler())

print('\nsystem     :', platform.system())
print('release    :', platform.release())
print('machine    :', platform.machine())
print('processor  :', platform.processor())
print('interpreter:', platform.architecture()[0])
print('CPU count  :', multiprocessing.cpu_count())
print('\n\n')

In [5]:
%matplotlib inline

In [6]:
import matplotlib.pyplot as plt

def plot_timing():

labels = [('st_mean', 'statistics.mean()'),
('np_mean', 'numpy.mean() on list'),
('np_mean_ary', 'numpy.mean() on array'),
('calc_mean', 'sum(samples)/len(samples)'),
('np_convert_and_mean_ary', 'convert to array then numpy.mean()')
]

plt.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
plt.plot(orders_n, times_n[lb[0]],
alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different approaches for calculating sample means')

max_perf = max( s/c for s,c in zip(times_n['st_mean'],
times_n['np_convert_and_mean_ary']) )
min_perf = min( s/c for s,c in zip(times_n['st_mean'],
times_n['np_convert_and_mean_ary']) )

ftext = 'Converting a list to a numpy array and then using numpy.mean() \n is {:.2f}x to '\
'{:.2f}x faster than statistics.mean() on lists'\
.format(min_perf, max_perf)
plt.figtext(.14,.15, ftext, fontsize=11, ha='left')

plt.show()


## Results¶

In [7]:
print_sysinfo()
plot_timing()

('\nPython version:', '2.7.5')
('NumPy version', '1.6.2')
('compiler:', 'GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.0.68)')
('\nsystem     :', 'Darwin')
('release    :', '13.4.0')
('machine    :', 'x86_64')
('processor  :', 'i386')
('interpreter:', '64bit')
('CPU count  :', 8)


In [7]: