import time
print('Last updated: %s' %time.strftime('%d/%m/%Y'))
Last updated: 24/06/2014
There are multiple ways in NumPy to extract particular rows or columns of interest from a NumPy array.
For example, given a 3x3-dimensional matrix A as shown below,
import numpy as np
A = np.array([[1,2,3],[4,5,6], [7,8,9]])
print(A)
[[1 2 3] [4 5 6] [7 8 9]]
we now want to extract the first and the last column. The most intuitive way would probably be by direct indexing:
print(A[:,[0,-1]])
[[1 3] [4 6] [7 9]]
But alternatively, we could also have used the numpy.take
function here:
print(np.take(A, indices=[0,-1], axis=1))
[[1 3] [4 6] [7 9]]
print(A[[0,-1],:])
[[1 2 3] [7 8 9]]
print(np.take(A, indices=[0,-1], axis=0))
[[1 2 3] [7 8 9]]
print('A before:\n', A)
B = A[[0,-1],:]
B[0,0] = 100
C = np.take(A, indices=[0,-1], axis=0)
C[0,0] = 150
print('\nB:\n', B)
print('\nC:\n', C)
print('\nA after:\n', A)
A before: [[1 2 3] [4 5 6] [7 8 9]] B: [[100 2 3] [ 7 8 9]] C: [[150 2 3] [ 7 8 9]] A after: [[1 2 3] [4 5 6] [7 8 9]]
C = np.take(A, indices=[0,-1], axis=0)
is equivalent to
np.take(A, indices=[0,-1], axis=0, out=C)
However, the former approach is the slightly faster one:
%timeit C = np.take(A, indices=[0,-1], axis=0)
%timeit np.take(A, indices=[0,-1], axis=0, out=C)
100000 loops, best of 3: 3.79 µs per loop 100000 loops, best of 3: 5.47 µs per loop
timeit
benchmarks¶In the following benchmark, we will see how fast the two approaches are in returning the first column/row, a random column/row, and the last column/row of 2-dimensional arrays (in terms of their axes) consisting NxN elements.
import timeit
from numpy import take as np_take
orders_n = [10**i for i in range(1, 5)]
idx_slice_col, idx_slice_row, np_take_col, np_take_row = [], [], [], []
for n in orders_n:
rand_idx = np.random.randint(n)
A = np.random.randn(n, n)
idx_slice_col.append(min(timeit.Timer('A[:, [0, rand_idx, -1]]',
'from __main__ import A, rand_idx').repeat(repeat=3, number=1)))
np_take_col.append(min(timeit.Timer('np_take(A, indices=[0, rand_idx, -1], axis=1)',
'from __main__ import A, rand_idx, np_take').repeat(repeat=3, number=1)))
idx_slice_row.append(min(timeit.Timer('A[[0, rand_idx, -1], :]',
'from __main__ import A, rand_idx').repeat(repeat=3, number=1)))
np_take_row.append(min(timeit.Timer('np_take(A, indices=[0, rand_idx, -1], axis=0)',
'from __main__ import A, rand_idx, np_take').repeat(repeat=3, number=1)))
import platform
import multiprocessing
def print_sysinfo():
print('\nPython version :', platform.python_version())
print('compiler :', platform.python_compiler())
print('Numpy version :', np.__version__)
print('\nsystem :', platform.system())
print('release :', platform.release())
print('machine :', platform.machine())
print('processor :', platform.processor())
print('CPU count :', multiprocessing.cpu_count())
print('interpreter:', platform.architecture()[0])
print('\n\n')
%matplotlib inline
import matplotlib.pyplot as plt
def plot():
f, ax = plt.subplots(1, 2, figsize=(15,5))
ax[0].plot(orders_n, idx_slice_col, alpha=0.4, lw=3, marker='o',
label='Classic indexing\nA[:, [0, rand_idx, -1]]')
ax[0].plot(orders_n, np_take_col, alpha=0.4, lw=3, marker='o',
label='NumPy.take\nnp.take(A, indices=[0, rand_idx, -1], axis=1)')
ax[0].set_title('Getting columns from a NumPy array')
ax[1].plot(orders_n, idx_slice_row, alpha=0.4, lw=3, marker='o',
label='Classic indexing\nA[[0, rand_idx, -1], :]')
ax[1].plot(orders_n, np_take_row, alpha=0.4, lw=3, marker='o',
label='NumPy.take\nnp_take(A, indices=[0, rand_idx, -1], axis=0)')
ax[1].set_title('Getting rows from a NumPy array')
for x in ax:
x.legend(loc='upper left')
x.set_ylabel('time in seconds')
x.set_xlabel('NumPy array size')
plt.tight_layout()
plt.show()
plot()
print_sysinfo()
Python version : 3.4.1 compiler : GCC 4.2.1 (Apple Inc. build 5577) Numpy version : 1.8.1 system : Darwin release : 13.2.0 machine : x86_64 processor : i386 CPU count : 4 interpreter: 64bit