Follow me! https://github.com/astrojuanlu/
(From https://jakevdp.github.io/blog/2014/05/09/why-python-is-slow/)
(From https://gist.github.com/Juanlu001/cf19b1c16caf618860fb)
I don't have lots of experience with it, so I don't have solid arguments against it.
https://github.com/pfalcon/awesome-python-compilers/
The number of projects keeps growing
Numba is an open source JIT compiler that translates a subset of Python and NumPy code into fast machine code.
$ pip install numba
$ conda install numba [--channel conda-forge]
import random
def monte_carlo_pi(nsamples):
acc = 0
for i in range(nsamples):
x = random.random()
y = random.random()
if (x ** 2 + y ** 2) < 1.0:
acc += 1
return 4.0 * acc / nsamples
monte_carlo_pi(100)
3.56
monte_carlo_pi(100_000)
3.14984
%timeit monte_carlo_pi(10_000_000) # Slow!
5.38 s ± 708 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
from numba import jit
monte_carlo_pi_fast = jit(monte_carlo_pi)
%timeit -n1 -r1 monte_carlo_pi_fast(100)
272 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
%timeit monte_carlo_pi_fast(10_000_000) # 40x faster!
152 ms ± 8.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
(From http://www.efunda.com/formulae/solid_mechanics/plates/calculators/SSSS_PPoint.cfm)
import numpy as np
from numpy import pi, sin
@jit
def a_mn_point(P, a, b, xi, eta, mm, nn):
"""Navier series coefficient for concentrated load."""
return 4 * P * sin(mm * pi * xi / a) * sin(nn * pi * eta / b) / (a * b)
@jit
def plate_displacement(xx, yy, ww, a, b, P, xi, eta, D, max_m, max_n):
max_i, max_j = ww.shape
for mm in range(1, max_m):
for nn in range(1, max_n):
for ii in range(max_i):
for jj in range(max_j):
a_mn = a_mn_point(P, a, b, xi, eta, mm, nn)
ww[ii, jj] += (
a_mn
/ (mm ** 2 / a ** 2 + nn ** 2 / b ** 2) ** 2
* sin(mm * pi * xx[ii, jj] / a)
* sin(nn * pi * yy[ii, jj] / b)
/ (pi ** 4 * D)
)
# Plate geometry
a = 1.0 # m
b = 1.0 # m
h = 50e-3 # m
# Material properties
E = 69e9 # Pa
nu = 0.35
# Series terms
max_m = 16
max_n = 16
# Computation points
# NOTE: With an odd number of points the center of the place is included in
# the grid
NUM_POINTS = 101
# Load
P = -10e3 # N
xi = a / 2
eta = a / 2
# Flexural rigidity
D = h**3 * E / (12 * (1 - nu**2))
# Set up domain
x = np.linspace(0, a, num=NUM_POINTS)
y = np.linspace(0, b, num=NUM_POINTS)
xx, yy = np.meshgrid(x, y)
# Compute displacement field
ww = np.zeros_like(xx)
%timeit -n1 -r1 plate_displacement.py_func(xx, yy, ww, a, b, P, xi, eta, D, max_m, max_n)
23.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
%timeit plate_displacement(xx, yy, ww, a, b, P, xi, eta, D, max_m, max_n) # 100 times faster!
132 ms ± 3.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# https://github.com/plotly/plotly.py/issues/1664#issuecomment-511773518
import plotly.graph_objects as go
import plotly.io as pio
# Set default renderer
pio.renderers.default = "notebook_connected"
# Set default template
pio.templates["slides"] = go.layout.Template(layout=dict(width=800, height=550))
pio.templates.default = "plotly+slides"
fig = go.Figure()
fig.add_surface(z=ww)
"poliastro: An Astrodynamics library written in Python with Fortran performance" at the 6th International Conference on Astrodynamics Tools and Techniques
We can call C functions exported using CFFI:
from numba import njit, cffi_support
# See https://www.pybonacci.org/2016/02/07/como-crear-extensiones-en-c-para-python-usando-cffi-y-numba/
# (Spanish, sorry!)
import _hyper
cffi_support.register_module(_hyper)
_hyp2f1 = _hyper.lib.hyp2f1x # See https://github.com/numba/numba/issues/1688
@njit
def hyp2f1(a, b, c, x):
return _hyp2f1(a, b, c, x)
@jit
def range10():
l = []
for x in range(10):
l.append(x)
return l
range10()
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
@jit
def reversed_range10():
l = []
for x in range(10):
l.append(x)
return reversed(l) # innocuous change, but no reversed support in nopython mode
reversed_range10()
<ipython-input-18-2e1fa93510fc>:1: NumbaWarning: Compilation is falling back to object mode WITH looplifting enabled because Function "reversed_range10" failed type inference due to: Untyped global name 'reversed': cannot determine Numba type of <class 'type'> File "<ipython-input-18-2e1fa93510fc>", line 7: def reversed_range10(): <source elided> return reversed(l) # innocuous change, but no reversed support in nopython mode ^ <ipython-input-18-2e1fa93510fc>:1: NumbaWarning: Compilation is falling back to object mode WITHOUT looplifting enabled because Function "reversed_range10" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'> File "<ipython-input-18-2e1fa93510fc>", line 4: def reversed_range10(): <source elided> l = [] for x in range(10): ^ /home/juanlu/.pyenv/versions/3.8.0/envs/numba38/lib/python3.8/site-packages/numba/object_mode_passes.py:177: NumbaWarning: Function "reversed_range10" was compiled in object mode without forceobj=True, but has lifted loops. File "<ipython-input-18-2e1fa93510fc>", line 3: def reversed_range10(): l = [] ^ /home/juanlu/.pyenv/versions/3.8.0/envs/numba38/lib/python3.8/site-packages/numba/object_mode_passes.py:187: NumbaDeprecationWarning: Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour. For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit File "<ipython-input-18-2e1fa93510fc>", line 3: def reversed_range10(): l = [] ^ <ipython-input-18-2e1fa93510fc>:1: NumbaWarning: Compilation is falling back to object mode WITHOUT looplifting enabled because Function "reversed_range10" failed type inference due to: non-precise type pyobject [1] During: typing of argument at <ipython-input-18-2e1fa93510fc> (4) File "<ipython-input-18-2e1fa93510fc>", line 4: def reversed_range10(): <source elided> l = [] for x in range(10): ^ /home/juanlu/.pyenv/versions/3.8.0/envs/numba38/lib/python3.8/site-packages/numba/object_mode_passes.py:177: NumbaWarning: Function "reversed_range10" was compiled in object mode without forceobj=True. File "<ipython-input-18-2e1fa93510fc>", line 4: def reversed_range10(): <source elided> l = [] for x in range(10): ^ /home/juanlu/.pyenv/versions/3.8.0/envs/numba38/lib/python3.8/site-packages/numba/object_mode_passes.py:187: NumbaDeprecationWarning: Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour. For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit File "<ipython-input-18-2e1fa93510fc>", line 4: def reversed_range10(): <source elided> l = [] for x in range(10): ^
<list_reverseiterator at 0x7f00643809a0>
@jit(nopython=True)
def reversed_range10():
l = []
for x in range(10):
l.append(x)
return l[::-1] # innocuous change, but no reversed support in nopython mode
reversed_range10()
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
from numba import njit
@njit
def func(x):
return x**3 - 1
@njit
def fprime(x):
return 3 * x**2
@njit
def njit_newton(func, x0, fprime):
for _ in range(50):
fder = fprime(x0)
fval = func(x0)
newton_step = fval / fder
x = x0 - newton_step
if abs(x - x0) < 1.48e-8:
return x
x0 = x
%timeit njit_newton(func, 1.5, fprime)
%timeit njit_newton.py_func(func, 1.5, fprime=fprime)
56.5 µs ± 19.5 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) 4.93 µs ± 749 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
With a smart combination of closures and caching we can implement a workaround:
from functools import lru_cache
@lru_cache()
def newton_generator(func, fprime):
@njit
def njit_newton_final(x0):
for _ in range(50):
fder = fprime(x0)
fval = func(x0)
newton_step = fval / fder
x = x0 - newton_step
if abs(x - x0) < 1.48e-8:
return x
x0 = x
return njit_newton_final
%timeit -n1 -r1 newton_generator(func, fprime)
528 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
%timeit -n1 -r1 newton_generator(func, fprime)
2.24 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
newton_func = newton_generator(func, fprime)
%timeit -n1 -r1 newton_func(1.5)
102 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
%timeit newton_func(1.5)
307 ns ± 18.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
%timeit newton_generator(func, fprime)
211 ns ± 7.24 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
def newton(func, x0, fprime):
return newton_generator(func, fprime)(x0)
%timeit newton(func, 1.5, fprime)
584 ns ± 26 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
High level API:
astropy.units
or pint
, NumPy extensions for physical units)Dangerous™ algorithms:
numba.njit
)export NUMBA_DISABLE_JIT=1
Check out the documentation! https://numba.pydata.org/numba-doc/latest/
Project | Pros | Cons |
---|---|---|
NumPy | Powerful, omnipresent | Vectorized code is sometimes difficult to read1, if you can't vectorize you are out of luck |
Cython | Gradual, effective, widely used, mature | Tricky if you don't know any C, couldn't make the native debugger work2 |
PyPy | General purpose | C extensions still very slow, no wheels on PyPI |
Numba | Simplest, very effective | Only numerical code, needs special care |
And many others: Pythran, Nuitka, mypyc...
1Check out "Integration with the vernacular", by James Powell https://pyvideo.org/pydata-london-2015/integration-with-the-vernacular.html
2https://github.com/cython/cython/issues/2699
3See https://github.com/antocuni/pypy-wheels for a half-baked effort. Perhaps the future will be brighter with the new manylinux2010 specification? https://bitbucket.org/pypy/pypy/issues/2617/pypy-binary-is-linked-to-too-much-stuff, https://github.com/pypa/manylinux/issues/179