In [1]:
import numpy as np
import time

Motivation

First, a motivating example for why we might want a NUMerical PYthon (numpy) library.

Squaring a list of numbers:

In [28]:
num_elements = 1000000
input_array = list(range(num_elements))
In [22]:
start_time = time.time()

return_array = [0] * len(input_array)
for key, val in enumerate(input_array):
    return_array[key] = val * val

print(time.time() - start_time)
0.1591188907623291
In [23]:
start_time = time.time()
return_array_vectorized = np.square(input_array)
print(time.time() - start_time)
0.044077396392822266

Basic array operations

Array construction

In [24]:
np.zeros(5)
Out[24]:
array([0., 0., 0., 0., 0.])
In [25]:
np.identity(3)
Out[25]:
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
In [26]:
array_1 = np.array([[1,2],[3,4],[5,6]])  # An array with arbitrary elements.
array_1
Out[26]:
array([[1, 2],
       [3, 4],
       [5, 6]])
In [30]:
array_1.shape
Out[30]:
(3, 2)

type of object is "np.ndarray" = n-dimensional array. It's most basic property is its shape

In [60]:
type(array_1)
Out[60]:
(numpy.ndarray, (3, 2))
In [61]:
array_1.shape
Out[61]:
(3, 2)

Array access, or "indexing"

In [63]:
array_1[1,1]  # Access an element
Out[63]:
4
In [32]:
array_1[1]  # Access a row
Out[32]:
array([3, 4])
In [35]:
array_1[:,1]  # Access a column
Out[35]:
array([2, 4, 6])
In [36]:
array_1[1,:]  # Access a row
Out[36]:
array([3, 4])

Fancy indexing -- can select multiple elements using a list or array of indices

In [64]:
array_1[1]
Out[64]:
array([3, 4])
In [65]:
array_1[[1,1,1]]
Out[65]:
array([[3, 4],
       [3, 4],
       [3, 4]])
In [66]:
array_1[:,1]
Out[66]:
array([2, 4, 6])
In [67]:
array_1[:,[1,1,1]]
Out[67]:
array([[2, 2, 2],
       [4, 4, 4],
       [6, 6, 6]])

Can do some fancy things...

Perhaps one of the most useful is to convert a list of integers to 1-hot vectors:

In [71]:
np.eye(3)[[1,1,0,2]]
Out[71]:
array([[0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.]])

Operations when shapes match

In [74]:
array_1 + [[1,1],[1,1],[1,1]]
Out[74]:
array([[2, 3],
       [4, 5],
       [6, 7]])
In [75]:
array_1 * array_1 # elementwise multiplication
Out[75]:
array([[ 1,  4],
       [ 9, 16],
       [25, 36]])

Operations when shapes don't match --- or "Broadcasting" of operations

In [48]:
array_2 = np.add(array_1, 1)  # array_1 + 1
array_2
Out[48]:
array([[2, 3],
       [4, 5],
       [6, 7]])
In [37]:
array_2 = array_1 + 1
array_2
Out[37]:
array([[2, 3],
       [4, 5],
       [6, 7]])

Can broadcast arrays starting from last dimensions...

In [42]:
# Recall:
array_2.shape
Out[42]:
(3, 2)
In [43]:
array_2 = array_1 + [1,2]
array_2
Out[43]:
array([[2, 4],
       [4, 6],
       [6, 8]])

If you broadcast incorrect, it will often throw an error

In [44]:
array_2 = array_1 + [1,2,3]
array_2
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-44-9210d012adc3> in <module>()
----> 1 array_2 = array_1 + [1,2,3]
      2 array_2

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

But sometimes it's silent, so the safest way to broadcast is to match dimensions:

In [49]:
np.array([[1,2]]).shape
Out[49]:
(1, 2)
In [50]:
array_2 = array_1 + [[1,2]]
array_2
Out[50]:
array([[2, 4],
       [4, 6],
       [6, 8]])

You can broadcast basically any operation that makes sense:

In [54]:
array_1 * 2 # np.multiply(array_1, 2)
Out[54]:
array([[ 2,  4],
       [ 6,  8],
       [10, 12]])
In [55]:
array_1 / 2.0  # np.multiply(array_1, 1/2.0) @ 
Out[55]:
array([[0.5, 1. ],
       [1.5, 2. ],
       [2.5, 3. ]])

Dot product / matrix multiplication

In [80]:
# Vector

array_3 = np.array([1,2,3])
array_4 = np.array([4,5,6])

np.dot(array_3, array_4)
Out[80]:
32
In [81]:
# Or:
array_3.dot(array_4)
Out[81]:
32
In [82]:
# Matrix

array_3 = np.array([[1, 2]]) # [1, 2]
array_4 = np.array([[3], [4]]) # [2, 1]
np.dot(array_3, array_4)  # Inner-product
Out[82]:
array([[11]])
In [85]:
array_5 = np.dot(array_4, array_3)
array_5
Out[85]:
array([[3, 6],
       [4, 8]])

Vectorized operations

In [86]:
array_5.sum(axis=0) # np.mean(array_4, 0)
Out[86]:
array([ 7, 14])
In [88]:
array_5.sum(axis=1) # np.mean(array_4, 0)
Out[88]:
array([ 9, 12])
In [102]:
array_5.sum()
Out[102]:
21
In [87]:
array_5.mean(axis=0)
Out[87]:
array([3.5, 7. ])
In [57]:
array_5.std(axis=0)
Out[57]:
array([0.5, 1. ])
In [58]:
array_5.max(axis=0)
Out[58]:
array([4, 8])
In [59]:
array_5.shape
Out[59]:
(2, 2)

Transpose

In [89]:
M1 = array_1.T
M1
Out[89]:
array([[1, 3, 5],
       [2, 4, 6]])

Matrix elementwise exponentiation

In [91]:
np.power(M1, 3)  # M1 ^ 2?
Out[91]:
array([[  1,  27, 125],
       [  8,  64, 216]])
In [96]:
e = np.square(M1) == np.power(M1, 2)
e
Out[96]:
array([[ True,  True,  True],
       [ True,  True,  True]])

Boolean operations

In [99]:
if e:
  print('e is True')
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-99-4fab4d84926b> in <module>()
----> 1 if e:
      2   print('e is True')

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
In [100]:
if np.all(e):
  print('all entries of e are True')
all entries of e are True
In [101]:
if np.any(e):
  print('at least 1 entry of e is')
at least 1 entry of e is
In [ ]:
if np.any(e):
  print('at least 1 entry of e is')
In [104]:
np.all(e, 0) # if it makes sense, you can pass an axis argument!
Out[104]:
array([ True,  True,  True])

Matrix Inverse

In [105]:
np.linalg.inv(np.array([[2, 0], [0, 2]]))
Out[105]:
array([[0.5, 0. ],
       [0. , 0.5]])

Inverse fails if matrix is singular.

In [106]:
np.linalg.inv(np.matmul(M1.T, M1))  # This should raise an exception.
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-106-9e5bbb853a28> in <module>()
----> 1 np.linalg.inv(np.matmul(M1.T, M1))  # This should raise an exception.

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in inv(a)
    526     signature = 'D->D' if isComplexType(t) else 'd->d'
    527     extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 528     ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
    529     return wrap(ainv.astype(result_t, copy=False))
    530 

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_singular(err, flag)
     87 
     88 def _raise_linalgerror_singular(err, flag):
---> 89     raise LinAlgError("Singular matrix")
     90 
     91 def _raise_linalgerror_nonposdef(err, flag):

LinAlgError: Singular matrix

Evaluating linear systems Ax = b for b

In [107]:
A1 = np.array([[1, 1], [0, 1]])
x1 = np.array([[2], [2]])
b1 = np.matmul(A1, x1)
b1
Out[107]:
array([[4],
       [2]])

Solving linear systems Ax = b for x

In [108]:
x1_verify = np.linalg.solve(A1, b1)
all(x1 == x1_verify)
Out[108]:
True

If A is singular then the linear system may be overdetermined (no solution) when solving for x.

In [110]:
A2 = np.array([[1, 1], [2, 2]])  # Note that the rank of this matrix is 1.
x2 = np.array([[2], [3]])
potential_sol = np.matmul(A2, x2)
potential_sol
Out[110]:
array([[ 5],
       [10]])
In [111]:
b2 = np.array([[5], [11]])
x2_verify = np.linalg.solve(A2, b2)  # This should raise an exception.
# There is no selection of x to solve this linear system for A2 and b2.
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-111-56dd9bbf1477> in <module>()
      1 b2 = np.array([[5], [11]])
----> 2 x2_verify = np.linalg.solve(A2, b2)  # This should raise an exception.
      3 # There is no selection of x to solve this linear system for A2 and b2.

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in solve(a, b)
    388     signature = 'DD->D' if isComplexType(t) else 'dd->d'
    389     extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 390     r = gufunc(a, b, signature=signature, extobj=extobj)
    391 
    392     return wrap(r.astype(result_t, copy=False))

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_singular(err, flag)
     87 
     88 def _raise_linalgerror_singular(err, flag):
---> 89     raise LinAlgError("Singular matrix")
     90 
     91 def _raise_linalgerror_nonposdef(err, flag):

LinAlgError: Singular matrix
In [112]:
x2_verify = np.linalg.solve(A2, potential_sol)  # This should raise an exception.
# There happens to exist an x to solve this system, but numpy still fails.
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-112-ee72a78afdea> in <module>()
----> 1 x2_verify = np.linalg.solve(A2, potential_sol)  # This should raise an exception.
      2 # There happens to exist an x to solve this system, but numpy still fails.

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in solve(a, b)
    388     signature = 'DD->D' if isComplexType(t) else 'dd->d'
    389     extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 390     r = gufunc(a, b, signature=signature, extobj=extobj)
    391 
    392     return wrap(r.astype(result_t, copy=False))

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_singular(err, flag)
     87 
     88 def _raise_linalgerror_singular(err, flag):
---> 89     raise LinAlgError("Singular matrix")
     90 
     91 def _raise_linalgerror_nonposdef(err, flag):

LinAlgError: Singular matrix

Another linear system that is underdetermined (many solutions) when solving for x

In [113]:
A3 = np.array([[1, 0, 0], [0, 1, 1]])
x3 = np.array([[1], [1], [1]])
b3 = np.matmul(A3, x3)
b3
Out[113]:
array([[1],
       [2]])
In [114]:
np.linalg.solve(A3, b3)  # This should raise an exception.
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-114-e0ec3f51dfe0> in <module>()
----> 1 np.linalg.solve(A3, b3)  # This should raise an exception.

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in solve(a, b)
    375     a, _ = _makearray(a)
    376     _assertRankAtLeast2(a)
--> 377     _assertNdSquareness(a)
    378     b, wrap = _makearray(b)
    379     t, result_t = _commonType(a, b)

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in _assertNdSquareness(*arrays)
    209     for a in arrays:
    210         if max(a.shape[-2:]) != min(a.shape[-2:]):
--> 211             raise LinAlgError('Last 2 dimensions of the array must be square')
    212 
    213 def _assertFinite(*arrays):

LinAlgError: Last 2 dimensions of the array must be square
In [ ]: