by Fedor Iskhakov, ANU
Description: NumPy arrays data types and differences to native Python, operations on the arrays, solving linear systems of equations.
Collection of modules (libraries) used in scientific numerical computations:
NumPy
is widely-used scientific computing package for implements fast array processing — vectorizationSciPy
is a collection of functions that perform common scientific operations (optimization, root finding, interpolation, numerical integration, etc.)Pandas
is data manipulation package with special data types and methodsNumba
is just in time (JIT) compiler for a subset of Python and NumPy functionsMatplotlib
serves for making figures and plotsimport LIBRARY as ref
, then call library functions as ref.function
from LIBRARIY import function
or from LIBRARIY import *
, then call library functions directly# import libraries
import numpy as np
N = 1000000
data_list = list(range(N)) # Native Python list
t1 = %timeit -n10 -r10 -o mean1 = sum(data_list)/N
import numpy as np
data_array = np.array(range(N)) #NumPy array
t2 = %timeit -n10 -r10 -o mean2 = data_array.mean()
print('NumPy is on avarage %2.3f faster' % (t1.average/t2.average))
5.28 ms ± 63.6 µs per loop (mean ± std. dev. of 10 runs, 10 loops each) 575 µs ± 26 µs per loop (mean ± std. dev. of 10 runs, 10 loops each) NumPy is on avarage 9.193 faster
SI orders of magnitude https://en.wikipedia.org/wiki/Micro-
Inherited from low lever C implementation
Full list of types: https://numpy.org/doc/stable/user/basics.types.html
import sys
x = np.array([-1,0,1.4],dtype='bool')
y = np.array([-1,0,1.4],dtype='int16')
z = np.array([-1,0,1.4],dtype='float64')
print('x %s, takes %d bytes' % (type(x[0]),sys.getsizeof(x)))
print('y %s, takes %d bytes' % (type(y[0]),sys.getsizeof(y)))
print('z %s, takes %d bytes' % (type(z[0]),sys.getsizeof(z)))
x <class 'numpy.bool_'>, takes 99 bytes y <class 'numpy.int16'>, takes 102 bytes z <class 'numpy.float64'>, takes 120 bytes
NumPy array hold data of the same type
x = np.array([0,0,0],dtype='uint8')
x[0] = 255
x[1] = x[0] + 1
x[2] = x[1] + 1
print(x)
[255 0 1]
# Inf and NaN
# np.seterr(all=None, divide='ignore', over=None, under=None, invalid='ignore')
x = np.array([-1,0,1,10],dtype='float64')
print( x / 0 )
# y = 10 / 0 # core Python
[-inf nan inf inf]
/usr/local/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in true_divide after removing the cwd from sys.path. /usr/local/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide after removing the cwd from sys.path.
# Comparing nans and infs
a = (np.inf == np.inf)
b = (np.nan == np.nan) # Can not compare nan to nan!
c = np.isnan(np.nan)
print (a, b, c)
True False True
First, import module obj_explore.py
: this is not trivial because jupyter notebooks require imported modules to be on the known PATH
# add path to the modules directory
import sys
sys.path.insert(1, './_static/include/')
# import the obj_explore() function
from obj_explore import *
a = np.array([1,2,3,4,5],dtype='float64')
# a = np.arange(5,dtype='uint8') + 1
print([type(a_element) for a_element in a])
a
[<class 'numpy.float64'>, <class 'numpy.float64'>, <class 'numpy.float64'>, <class 'numpy.float64'>, <class 'numpy.float64'>]
array([1., 2., 3., 4., 5.])
obj_explore(a,'public')
# help(a)
[1. 2. 3. 4. 5.] ------------------------------------------------------------ Object report on object = array([1., 2., 3., 4., 5.]) Objec class : <class 'numpy.ndarray'> Parent classes : <class 'object'> Occupied memory : 136 bytes PUBLIC PROPERTIES T = array([1., 2., 3., 4., 5.]) <class 'numpy.ndarray'> base = None <class 'NoneType'> ctypes = <numpy.core._internal._ctypes object at 0x7fc62a17dc10> <class 'numpy.core._internal._ctypes'> data = <memory at 0x7fc6488ee870> <class 'memoryview'> dtype = dtype('float64') <class 'numpy.dtype'> flags = C_CONTIGUOUS : True F_CONTIGUOUS : True OWNDATA : True WRITEABLE : True ALIGNED : True WRITEBACKIFCOPY : False UPDATEIFCOPY : False <class 'numpy.flagsobj'> flat = <numpy.flatiter object at 0x7fc5f40c1c00> <class 'numpy.flatiter'> imag = array([0., 0., 0., 0., 0.]) <class 'numpy.ndarray'> itemsize = 8 <class 'int'> nbytes = 40 <class 'int'> ndim = 1 <class 'int'> real = array([1., 2., 3., 4., 5.]) <class 'numpy.ndarray'> shape = (5,) <class 'tuple'> size = 5 <class 'int'> strides = (8,) <class 'tuple'> PUBLIC METHODS all <class 'builtin_function_or_method'> any <class 'builtin_function_or_method'> argmax <class 'builtin_function_or_method'> argmin <class 'builtin_function_or_method'> argpartition <class 'builtin_function_or_method'> argsort <class 'builtin_function_or_method'> astype <class 'builtin_function_or_method'> byteswap <class 'builtin_function_or_method'> choose <class 'builtin_function_or_method'> clip <class 'builtin_function_or_method'> compress <class 'builtin_function_or_method'> conj <class 'builtin_function_or_method'> conjugate <class 'builtin_function_or_method'> copy <class 'builtin_function_or_method'> cumprod <class 'builtin_function_or_method'> cumsum <class 'builtin_function_or_method'> diagonal <class 'builtin_function_or_method'> dot <class 'builtin_function_or_method'> dump <class 'builtin_function_or_method'> dumps <class 'builtin_function_or_method'> fill <class 'builtin_function_or_method'> flatten <class 'builtin_function_or_method'> getfield <class 'builtin_function_or_method'> item <class 'builtin_function_or_method'> itemset <class 'builtin_function_or_method'> max <class 'builtin_function_or_method'> mean <class 'builtin_function_or_method'> min <class 'builtin_function_or_method'> newbyteorder <class 'builtin_function_or_method'> nonzero <class 'builtin_function_or_method'> partition <class 'builtin_function_or_method'> prod <class 'builtin_function_or_method'> ptp <class 'builtin_function_or_method'> put <class 'builtin_function_or_method'> ravel <class 'builtin_function_or_method'> repeat <class 'builtin_function_or_method'> reshape <class 'builtin_function_or_method'> resize <class 'builtin_function_or_method'> round <class 'builtin_function_or_method'> searchsorted <class 'builtin_function_or_method'> setfield <class 'builtin_function_or_method'> setflags <class 'builtin_function_or_method'> sort <class 'builtin_function_or_method'> squeeze <class 'builtin_function_or_method'> std <class 'builtin_function_or_method'> sum <class 'builtin_function_or_method'> swapaxes <class 'builtin_function_or_method'> take <class 'builtin_function_or_method'> tobytes <class 'builtin_function_or_method'> tofile <class 'builtin_function_or_method'> tolist <class 'builtin_function_or_method'> tostring <class 'builtin_function_or_method'> trace <class 'builtin_function_or_method'> transpose <class 'builtin_function_or_method'> var <class 'builtin_function_or_method'> view <class 'builtin_function_or_method'>
import sys
def memory_usage(var,grow,steps=10):
"""Returns data on memory usage when var is grown using supplied function for given number of steps"""
d={"x":[],"y":[],"v":[]} # dictionary for x, y data, and values
for i in range(steps):
var=grow(var) # next value
d["v"].append(var)
d["x"].append(i+1)
d["y"].append(sys.getsizeof(var))
return d
x = [0,] # Python list
grow = lambda x: [0,]*len(x)*2
d1 = memory_usage(x,grow,steps=10)
x = np.array([0])
grow = lambda x: np.array([0,]*x.size*2,dtype='int8')
d2 = memory_usage(x,grow,steps=10)
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(d1["x"],d1["y"],label='Python')
plt.plot(d2["x"],d2["y"],label='Numpy')
plt.axis([min(d1["x"]),max(d1["x"]),0,max(d1["y"])+1])
plt.ylabel('size in memory, bytes')
plt.xlabel('steps of variable update')
plt.legend()
plt.show()
a = np.array([1,3,5.0,7])
print(a)
[1. 3. 5. 7.]
a = np.empty(25,'int8') # not initialized !
b = np.zeros(5) # initialized with zeros
c = np.ones(5)
d = np.arange(10)
e = np.linspace(2, 3, 11) # fill between 2 and 3 with 10 points
print(a,b,c,d,e,sep='\n\n')
[ 0 0 0 0 0 0 0 -96 0 0 0 0 0 0 0 -96 41 0 -18 10 1 0 0 0 9] [0. 0. 0. 0. 0.] [1. 1. 1. 1. 1.] [0 1 2 3 4 5 6 7 8 9] [2. 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
Note that uninitialized array may have garbage (random state of the memory)
a = np.eye(5) # identity matrix
b = np.ones((2,3))
print(a)
print(b)
[[1. 0. 0. 0. 0.] [0. 1. 0. 0. 0.] [0. 0. 1. 0. 0.] [0. 0. 0. 1. 0.] [0. 0. 0. 0. 1.]] [[1. 1. 1.] [1. 1. 1.]]
b=b+2
c = np.asmatrix(b) # matrix !
print(b)
print(type(b))
print(c)
print(type(c))
[[3. 3. 3.] [3. 3. 3.]] <class 'numpy.ndarray'> [[3. 3. 3.] [3. 3. 3.]] <class 'numpy.matrix'>
print(b)
print(b*b) # element by element
print(c*c) # matrix multiplication
[[3. 3. 3.] [3. 3. 3.]] [[9. 9. 9.] [9. 9. 9.]]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-16-6f93afd2d822> in <module> 1 print(b) 2 print(b*b) # element by element ----> 3 print(c*c) # matrix multiplication /usr/local/anaconda3/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py in __mul__(self, other) 218 if isinstance(other, (N.ndarray, list, tuple)) : 219 # This promotes 1-D vectors to row vectors --> 220 return N.dot(self, asmatrix(other)) 221 if isscalar(other) or not hasattr(other, '__rmul__') : 222 return N.dot(self, other) <__array_function__ internals> in dot(*args, **kwargs) ValueError: shapes (2,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)
# c*c
print(c*c.transpose())
[[27. 27.] [27. 27.]]
Several types of indexes:
z = np.linspace(0, 2, 15)
z = np.reshape(z,[5,3])
print(z)
[[0. 0.14285714 0.28571429] [0.42857143 0.57142857 0.71428571] [0.85714286 1. 1.14285714] [1.28571429 1.42857143 1.57142857] [1.71428571 1.85714286 2. ]]
print(z,end='\n\n')
print( z[1] ) # scalar index: returns row array
print( z[1,0] ) # scalar index: returns number
print( z[-1:] ) # slicing: returns ?
print( z[1:3,1] ) # slicing + scalar index
print( z[1:3,1:] ) # slicing
print( z[:,1] ) # slicing to get the column
[[0. 0.14285714 0.28571429] [0.42857143 0.57142857 0.71428571] [0.85714286 1. 1.14285714] [1.28571429 1.42857143 1.57142857] [1.71428571 1.85714286 2. ]] [0.42857143 0.57142857 0.71428571] 0.42857142857142855 [[1.71428571 1.85714286 2. ]] [0.57142857 1. ] [[0.57142857 0.71428571] [1. 1.14285714]] [0.14285714 0.57142857 1. 1.42857143 1.85714286]
# Assigning elements of an array
z[1,0] = -1
z[2] = [4,5,7] # assign whole row from a list
z[:,0] = np.array([4.2,5.2,6.2,7.2,8.2]) # assign column from nparray
z[:2,1]=9.3
z[3][1]=-2 # note double bracket indexing
print(z)
[[ 4.2 9.3 0.28571429] [ 5.2 9.3 0.71428571] [ 6.2 5. 7. ] [ 7.2 -2. 1.57142857] [ 8.2 1.85714286 2. ]]
z = np.linspace(0, 2, 12)
z = np.reshape(z,[4,3])
print(z,end='\n\n')
print( z[[0,2,2],[0,1,0]] ) # numerical (element by element) indexing
print( z[z>1.0] ) # boolean indexing (masking)
mask = np.logical_and(z>1.0,z<1.75)
print(mask,end='\n\n')
print( z[mask] ) # boolean indexing (masking)
[[0. 0.18181818 0.36363636] [0.54545455 0.72727273 0.90909091] [1.09090909 1.27272727 1.45454545] [1.63636364 1.81818182 2. ]] [0. 1.27272727 1.09090909] [1.09090909 1.27272727 1.45454545 1.63636364 1.81818182 2. ] [[False False False] [False False False] [ True True True] [ True False False]] [1.09090909 1.27272727 1.45454545 1.63636364]
a = np.array([1,2,3])
b = np.array([4,5,6])
print(a + 10)
print(a + b)
[11 12 13] [5 7 9]
x = np.arange(3) + 5
y = np.ones((3,3))
print(x,y,x+y,sep='\n\n')
[5 6 7] [[1. 1. 1.] [1. 1. 1.] [1. 1. 1.]] [[6. 7. 8.] [6. 7. 8.] [6. 7. 8.]]
x = np.arange(5).reshape((5,1)) # or
x = np.arange(5)[:,np.newaxis]
print(x,end='\n\n')
print(x.transpose(),end='\n\n')
print(x + x.transpose())
[[0] [1] [2] [3] [4]] [[0 1 2 3 4]] [[0 1 2 3 4] [1 2 3 4 5] [2 3 4 5 6] [3 4 5 6 7] [4 5 6 7 8]]
x = np.arange(12).reshape((3,4))
y = np.arange(4)
print(x,y,x+y,sep='\n\n')
[[ 0 1 2 3] [ 4 5 6 7] [ 8 9 10 11]] [0 1 2 3] [[ 0 2 4 6] [ 4 6 8 10] [ 8 10 12 14]]
Functions provided by NumPy which support vectorization and broadcasting
https://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs
import math
N = 10000
data_list = list(range(N)) # Native Python list
t1 = %timeit -n10 -r10 -o sin1 = [math.sin(x) for x in data_list]
import numpy as np
data_array = np.array(range(N)) #NumPy array
t2 = %timeit -n10 -r10 -o sin2 = np.sin(data_array)
print('NumPy is on avarage %2.3f faster' % (t1.average/t2.average))
1.02 ms ± 15.4 µs per loop (mean ± std. dev. of 10 runs, 10 loops each) 101 µs ± 11.6 µs per loop (mean ± std. dev. of 10 runs, 10 loops each) NumPy is on avarage 10.158 faster
Functions that return the array of reduced size: sum, min, max , mean, all, any
x = np.arange(12).reshape(4,3)
print(x)
print(np.sum(x))
print(np.sum(x, axis=1))
print(np.maximum.reduce(x,axis=1,keepdims=True))
[[ 0 1 2] [ 3 4 5] [ 6 7 8] [ 9 10 11]] 66 [ 3 12 21 30] [[ 2] [ 5] [ 8] [11]]
x = np.arange(24).reshape(2,4,3)
print(x)
y = np.min(x,axis=2)
# y = np.mean(x,axis=(1,2))
print(y.shape)
print(y)
[[[ 0 1 2] [ 3 4 5] [ 6 7 8] [ 9 10 11]] [[12 13 14] [15 16 17] [18 19 20] [21 22 23]]] (2, 4) [[ 0 3 6 9] [12 15 18 21]]
NumPy tries not to copy data in the arrays when not necessary
x = np.arange(12).reshape(4,3) # 2-dim array
print(x)
y = x[:,1:2]
print(y)
print(y.flags)
y[0] = 999
print(x)
[[ 0 1 2] [ 3 4 5] [ 6 7 8] [ 9 10 11]] [[ 1] [ 4] [ 7] [10]] C_CONTIGUOUS : False F_CONTIGUOUS : False OWNDATA : False WRITEABLE : True ALIGNED : True WRITEBACKIFCOPY : False UPDATEIFCOPY : False [[ 0 999 2] [ 3 4 5] [ 6 7 8] [ 9 10 11]]
y = x[[0,1],[0,2]]
print(y)
print(y.flags)
y[0]=-100
print(x)
[0 5] C_CONTIGUOUS : True F_CONTIGUOUS : True OWNDATA : True WRITEABLE : True ALIGNED : True WRITEBACKIFCOPY : False UPDATEIFCOPY : False [[ 0 999 2] [ 3 4 5] [ 6 7 8] [ 9 10 11]]
Submodule numpy.linalg
import numpy.linalg as linalg
obj_explore(linalg,'public methods')
<module 'numpy.linalg' from '/usr/local/anaconda3/lib/python3.7/site-packages/numpy/linalg/__init__.py'> ------------------------------------------------------------ Object report on object = <module 'numpy.linalg' from '/usr/local/anaconda3/lib/python3.7/site-packages/numpy/linalg/__init__.py'> Objec class : <class 'module'> Parent classes : <class 'object'> Occupied memory : 88 bytes PUBLIC METHODS LinAlgError <class 'type'> cholesky <class 'function'> cond <class 'function'> det <class 'function'> eig <class 'function'> eigh <class 'function'> eigvals <class 'function'> eigvalsh <class 'function'> inv <class 'function'> lstsq <class 'function'> matrix_power <class 'function'> matrix_rank <class 'function'> multi_dot <class 'function'> norm <class 'function'> pinv <class 'function'> qr <class 'function'> slogdet <class 'function'> solve <class 'function'> svd <class 'function'> tensorinv <class 'function'> tensorsolve <class 'function'> test <class 'numpy._pytesttester.PytestTester'>
A = np.array([[1,2,0,5],[4,-2,1,1],[0,0,-2,7],[3,1,4,0]])
print(A)
[[ 1 2 0 5] [ 4 -2 1 1] [ 0 0 -2 7] [ 3 1 4 0]]
print(A,end='\n\n')
# print( A.transpose() )
# print( np.linalg.matrix_rank(A) )
# print( np.linalg.inv(A) )
# print( np.linalg.det(A) )
B = A[:2,:]
# print(B,end='\n\n')
# print( B * B ) # element by element
# print( B @ B ) # matrix multiplication
# print( B @ B.T )
[[ 1 2 0 5] [ 4 -2 1 1] [ 0 0 -2 7] [ 3 1 4 0]]
In matrix notation $ Ax=b $ where
$$ A= \begin{pmatrix} 1 & 2 & 0 & 5 \\ 4 & -2 & 1 & 1 \\ 0 & 0 & -2 & 7 \\ 3 & 1 & 4 & 0 \\ \end{pmatrix},\;\; b=\begin{pmatrix} 5\\ 5\\ 0\\ -3 \end{pmatrix},\;\; x=\begin{pmatrix} x_1\\ x_2\\ x_3\\ x_4 \end{pmatrix} $$b = np.array([5,5,0,-3])
x=np.linalg.solve(A, b)
print('Solution is %r' % x)
print('Check: max(Ax-b) = %1.5e' % np.max(A@x-b))
Solution is array([ 4.95555556, 3.91111111, -5.44444444, -1.55555556]) Check: max(Ax-b) = 8.88178e-16
A = np.array([[1,2,0],[4,-2,1],[0,0,-2],[3,1,4]])
x=np.linalg.lstsq(A, b, rcond=None)
A = np.array([[1,2,0],[4,-2,1],[0,0,-2],[3,1,4]])
x,*_=np.linalg.lstsq(A, b, rcond=None) # ignore all outputs except the first
print(A)
print('Solution is %r' % x)
print('Check: max(Ax-b) = %1.5e' % np.max(A@x-b))
# help(np.linalg.lstsq)
[[ 1 2 0] [ 4 -2 1] [ 0 0 -2] [ 3 1 4]] Solution is array([ 1.75294118, 0.63529412, -1.72941176]) Check: max(Ax-b) = 3.45882e+00
def random_matrix(n,positive=True,maxeigen=10):
'''Generates square random positive/negative semi-definite matrix'''
e=np.random.uniform(0,maxeigen,n) # random eigenvalues
r=np.random.uniform(0,1,n*n).reshape(n,n) # rotation
e = e if positive else -e
A = np.diag(e) # diagonal matrix with
return r @ A @ r.T # positive/negative semi-definite
n = 3 # number of products
A = random_matrix(n,positive=False) # demand
d = np.array([100,]*n)
B = random_matrix(n) # supply
s = np.zeros(n)
p = np.linalg.solve(A-B, s-d) # solve for quilibrium
q = A @ p + d # equilibrium quantities
print('Demand is given by Ap+d:\nA=%r\nd=%r' % (A,d))
print('Supply is given by Bp+s:\nB=%r\ns=%r' % (B,s))
print('Equilibrium prices are p = {}'.format(p))
print('Equilibrium quantities are q = {}'.format(q))
Demand is given by Ap+d: A=array([[ -1.50293121, -3.83492595, -2.53004055], [ -3.83492595, -14.91324372, -12.76421734], [ -2.53004055, -12.76421734, -12.93230177]]) d=array([100, 100, 100]) Supply is given by Bp+s: B=array([[4.96246147, 2.14375478, 4.0190379 ], [2.14375478, 1.24921004, 1.0231901 ], [4.0190379 , 1.0231901 , 5.54449637]]) s=array([0., 0., 0.]) Equilibrium prices are p = [15.18283269 1.4987406 -1.08770522] Equilibrium quantities are q = [74.185626 33.30758277 56.52309892]