This code snippet demonstrates the speed of numba
import numba
import numpy as np
import scipy.spatial.distance as distance
@numba.jit(nopython=True)
def ngrams(string, n=3):
res = []
for i in range(len(string) - n):
res.append(string[i:i+n])
return res
@numba.jit(nopython=True)
def cosine_sim(u, v):
ulen, vlen, s = 0, 0, 0
for k in range(len(u)):
i = u[k]
j = v[k]
s += i * j
ulen += i ** 2
vlen += j ** 2
ulen = np.sqrt(ulen)
vlen = np.sqrt(vlen)
return 1 - s / (ulen * vlen)
Show the caluclation is correct
cosine_sim([1,2], [3,4])
0.01613008990009257
distance.cosine([1, 2], [3, 4])
0.01613008990009257
It's faster than scipy!
%timeit cosine_sim([1,2], [3,4])
11.3 µs ± 993 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%timeit distance.cosine([1, 2], [3, 4])
26.8 µs ± 273 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)