Numba convertit les fonctions Python en code machine optimisé au moment de l'exécution à l'aide de la bibliothèque de compilateur LLVM standard. Les algorithmes numériques compilés par Numba en Python peuvent alors approcher les vitesses de C ou de FORTRAN, la où les boucles classiques, comme en R et matlab, peuvent être un peu lente.
import numpy as np
import time
from numba import jit
@jit(nopython=True)
def monte_carlo_pi(n_samples=1000):
acc = 0
for sample in range(n_samples):
vec = np.random.rand(2)
if np.linalg.norm(vec) < 1.:
acc += 1
return 4.0 * acc / n_samples
# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
monte_carlo_pi(n_samples=10000000)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))
# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
monte_carlo_pi(n_samples=1000)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))
Elapsed (with compilation) = 4.017842769622803 Elapsed (after compilation) = 0.000545501708984375
def go_slow(a): # Function is compiled and runs in machine code
trace = 0
for i in range(a.shape[0] - 1):
trace += np.tanh(a[i, i + 1])
return trace
all_n_samples = [1000, 5000, 10000]
t0 = []
t1 = []
t2 = []
for n_samples in all_n_samples:
print(n_samples)
x = np.arange(n_samples ** 2).reshape(n_samples, n_samples)
@jit(nopython=True)
def go_fast(a): # Function is compiled and runs in machine code
trace = 0
for i in range(a.shape[0] - 1):
trace += np.tanh(a[i, i + 1])
return trace
# COMPILATION INCLUSE!
start = time.time()
go_fast(x)
end = time.time()
t0.append(end - start)
print("Elapsed (with compilation) = %s" % (end - start))
# COMPILATION NON INCLUSE, EXECUTER DEPUIS LE CACHE
start = time.time()
go_fast(x)
end = time.time()
t1.append(end - start)
print("Elapsed (after compilation) = %s" % (end - start))
# VANILLA PYTHON
start = time.time()
go_slow(x)
end = time.time()
t2.append(end - start)
print("Elapsed (vanilla) = %s" % (end - start))
t0 = np.array(t0)
t1 = np.array(t1)
t2 = np.array(t2)
print(all_n_samples)
print("Améliorations en pourcentage par rapport au code vanilla")
print((t0 - t2) / t2 * 100)
print((t1 - t2) / t2 * 100)
1000 Elapsed (with compilation) = 0.07340669631958008 Elapsed (after compilation) = 1.2159347534179688e-05 Elapsed (vanilla) = 0.002764463424682617 5000 Elapsed (with compilation) = 0.050783395767211914 Elapsed (after compilation) = 8.559226989746094e-05 Elapsed (vanilla) = 0.011275768280029297 10000 Elapsed (with compilation) = 0.056412458419799805 Elapsed (after compilation) = 0.00040268898010253906 Elapsed (vanilla) = 0.02436542510986328 [1000, 5000, 10000] Améliorations en pourcentage par rapport au code vanilla [2555.3686934 350.3763691 131.52667423] [-99.56015524 -99.24091851 -98.34729344]
n_samples = 1000
n_features = 500
n_iterations = 2000
X = np.random.randn(n_samples, n_features)
y = np.random.randn(n_samples)
y[n_samples // 2:] = 0
w = np.zeros(n_features) # init = 0
@jit(nopython=True)
# Function is compiled and runs in machine code
def gradient(X, y, w, step_size=0.01, max_iter=1000):
"""Gradient descent with constant step size."""
for k in range(max_iter):
w -= step_size * (X.T.dot(X.dot(w) - y))
return w
# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
gradient(X, y, w)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))
# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
gradient(X, y, w)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))
y = np.random.randint(2, size=n_samples) *2 -1
print(y)
w = np.zeros(n_features) # init = 0
def logistic_regression_no_jit(y, X, w, iterations=1000):
for i in range(iterations):
w -= np.dot(((1.0 / (1.0 + np.exp(-y * np.dot(X, w))) - 1.0) * y), X)
return w
start = time.time()
logistic_regression_no_jit(y, X, w, iterations=n_iterations )
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))
@jit(nopython=True)
def logistic_regression(y, X, w, iterations=1000):
for i in range(iterations):
w -= np.dot(((1.0 / (1.0 + np.exp(-y * np.dot(X, w))) - 1.0) * y), X)
return w
# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
logistic_regression(y, X, w, iterations=n_iterations)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))
# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
logistic_regression(y, X, w, iterations=n_iterations)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))