NumPy is licensed under the BSD license, enabling reuse with few restrictions.
import numpy as np # extremely common naming for Numpy to be used as np
a = np.arange(20).reshape(4,5)
a
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
b = np.arange(5,500,10)
b
array([ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105, 115, 125, 135, 145, 155, 165, 175, 185, 195, 205, 215, 225, 235, 245, 255, 265, 275, 285, 295, 305, 315, 325, 335, 345, 355, 365, 375, 385, 395, 405, 415, 425, 435, 445, 455, 465, 475, 485, 495])
type(a),type(b),a.dtype,b.dtype
(numpy.ndarray, numpy.ndarray, dtype('int32'), dtype('int32'))
NumPy’s main object is the homogeneous multidimensional array. It is a table of elements (usually numbers), all of the same type, indexed by a tuple of positive integers. In NumPy dimensions are called axes.
NumPy’s array class is called ndarray. It is also known by the alias array. Note that numpy.array is not the same as the Standard Python Library class array.array, which only handles one-dimensional arrays and offers less functionality.
NumPy's arrays are more compact than Python lists (about 3-5x difference)
Faster access for reading/writing items / less Python "bookkeeping"
Convencience operations matrix operations/Fourier Transforms/histograms/statistics/filtering/etc
Spatial locality in memory access patterns results in performance gains notably due to the CPU cache
Items stored contiguously in memory allow NumPy to take advantage of vectorized instructions of modern CPUs, such as Intel's SSE and AVX, AMD's XOP
Restriction(uniform data types)
d=a*2
d
array([[ 0, 2, 4, 6, 8], [10, 12, 14, 16, 18], [20, 22, 24, 26, 28], [30, 32, 34, 36, 38]])
c=a**2
c
array([[ 0, 1, 4, 9, 16], [ 25, 36, 49, 64, 81], [100, 121, 144, 169, 196], [225, 256, 289, 324, 361]], dtype=int32)
# individual elements
c[1,1]
36
c[1]
array([25, 36, 49, 64, 81], dtype=int32)
c[:2]
array([[ 0, 1, 4, 9, 16], [25, 36, 49, 64, 81]], dtype=int32)
c[:,1]
array([ 1, 36, 121, 256], dtype=int32)
c[::-1]
array([[225, 256, 289, 324, 361], [100, 121, 144, 169, 196], [ 25, 36, 49, 64, 81], [ 0, 1, 4, 9, 16]], dtype=int32)
c[::-1,::-1]
array([[361, 324, 289, 256, 225], [196, 169, 144, 121, 100], [ 81, 64, 49, 36, 25], [ 16, 9, 4, 1, 0]], dtype=int32)
c
array([[ 0, 1, 4, 9, 16], [ 25, 36, 49, 64, 81], [100, 121, 144, 169, 196], [225, 256, 289, 324, 361]], dtype=int32)
c[:, 1], c[0:2, 1]
(array([ 1, 36, 121, 256], dtype=int32), array([ 1, 36], dtype=int32))
# Loading files
my_csv = np.genfromtxt('./resources/random4x9.csv', delimiter=',')
my_csv
array([[0.72809242, 0.43954956, 0.96609444, 0.95103408], [0.43789427, 0.56524801, 0.26548715, 0.87307645], [0.68820884, 0.41444608, 0.33806432, 0.56520271], [0.14000647, 0.955224 , 0.07058813, 0.09764705], [0.03317341, 0.77677129, 0.08200467, 0.19588284], [0.32389879, 0.60277379, 0.77265652, 0.53549964], [0.78382241, 0.44545018, 0.09123232, 0.24865343], [0.07532197, 0.13528559, 0.59399663, 0.31171427], [0.31212006, 0.70050059, 0.44611673, 0.85198123]])
list(my_csv.reshape(1,36))
[array([0.72809242, 0.43954956, 0.96609444, 0.95103408, 0.43789427, 0.56524801, 0.26548715, 0.87307645, 0.68820884, 0.41444608, 0.33806432, 0.56520271, 0.14000647, 0.955224 , 0.07058813, 0.09764705, 0.03317341, 0.77677129, 0.08200467, 0.19588284, 0.32389879, 0.60277379, 0.77265652, 0.53549964, 0.78382241, 0.44545018, 0.09123232, 0.24865343, 0.07532197, 0.13528559, 0.59399663, 0.31171427, 0.31212006, 0.70050059, 0.44611673, 0.85198123])]
# going back to Python lists....
myl=list(my_csv)
myl
[array([0.72809242, 0.43954956, 0.96609444, 0.95103408]), array([0.43789427, 0.56524801, 0.26548715, 0.87307645]), array([0.68820884, 0.41444608, 0.33806432, 0.56520271]), array([0.14000647, 0.955224 , 0.07058813, 0.09764705]), array([0.03317341, 0.77677129, 0.08200467, 0.19588284]), array([0.32389879, 0.60277379, 0.77265652, 0.53549964]), array([0.78382241, 0.44545018, 0.09123232, 0.24865343]), array([0.07532197, 0.13528559, 0.59399663, 0.31171427]), array([0.31212006, 0.70050059, 0.44611673, 0.85198123])]
# Converting individual ndarrays in our regular list to regular inner lists
myll = [list(el) for el in myl]
myll
[[0.728092422, 0.439549556, 0.966094444, 0.951034076], [0.437894272, 0.565248006, 0.265487155, 0.873076449], [0.688208844, 0.414446083, 0.338064325, 0.565202708], [0.140006466, 0.955223997, 0.070588134, 0.097647052], [0.033173413, 0.776771295, 0.082004671, 0.195882839], [0.323898787, 0.602773787, 0.77265652, 0.535499641], [0.783822409, 0.44545018, 0.091232322, 0.24865343], [0.075321971, 0.13528559, 0.59399663, 0.311714268], [0.312120058, 0.700500595, 0.446116732, 0.851981232]]
## Litmus test: for group operations NumPy, if you have many individual operations then regular list might suffice
## NOTE: For multiple data types better to use Pandas library(another lecture)
dir(a)
['T', '__abs__', '__add__', '__and__', '__array__', '__array_finalize__', '__array_interface__', '__array_prepare__', '__array_priority__', '__array_struct__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__complex__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__divmod__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rlshift__', '__rmatmul__', '__rmod__', '__rmul__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__', '__xor__', 'all', 'any', 'argmax', 'argmin', 'argpartition', 'argsort', 'astype', 'base', 'byteswap', 'choose', 'clip', 'compress', 'conj', 'conjugate', 'copy', 'ctypes', 'cumprod', 'cumsum', 'data', 'diagonal', 'dot', 'dtype', 'dump', 'dumps', 'fill', 'flags', 'flat', 'flatten', 'getfield', 'imag', 'item', 'itemset', 'itemsize', 'max', 'mean', 'min', 'nbytes', 'ndim', 'newbyteorder', 'nonzero', 'partition', 'prod', 'ptp', 'put', 'ravel', 'real', 'repeat', 'reshape', 'resize', 'round', 'searchsorted', 'setfield', 'setflags', 'shape', 'size', 'sort', 'squeeze', 'std', 'strides', 'sum', 'swapaxes', 'take', 'tobytes', 'tofile', 'tolist', 'tostring', 'trace', 'transpose', 'var', 'view']
my_csv.ndim
2
my_csv
array([[0.72809242, 0.43954956, 0.96609444, 0.95103408], [0.43789427, 0.56524801, 0.26548715, 0.87307645], [0.68820884, 0.41444608, 0.33806432, 0.56520271], [0.14000647, 0.955224 , 0.07058813, 0.09764705], [0.03317341, 0.77677129, 0.08200467, 0.19588284], [0.32389879, 0.60277379, 0.77265652, 0.53549964], [0.78382241, 0.44545018, 0.09123232, 0.24865343], [0.07532197, 0.13528559, 0.59399663, 0.31171427], [0.31212006, 0.70050059, 0.44611673, 0.85198123]])
?my_csv.T
my_csv.shape
(9, 4)
my_csv.dtype
dtype('float64')
my_csv.itemsize
8
my_csv.data
<memory at 0x00000000056D48B8>
the number of axes (dimensions) of the array.
the dimensions of the array. This is a tuple of integers indicating the size of the array in each dimension. For a matrix with n rows and m columns, shape will be (n,m). The length of the shape tuple is therefore the number of axes, ndim.
the total number of elements of the array. This is equal to the product of the elements of shape.
an object describing the type of the elements in the array. One can create or specify dtype’s using standard Python types. Additionally NumPy provides types of its own. numpy.int32, numpy.int16, and numpy.float64 are some examples.
the size in bytes of each element of the array. For example, an array of elements of type float64 has itemsize 8 (=64/8), while one of type complex32 has itemsize 4 (=32/8). It is equivalent to ndarray.dtype.itemsize.
the buffer containing the actual elements of the array. Normally, we won’t need to use this attribute because we will access the elements in an array using indexing facilities.
a.ndim
2
a.shape
(4, 5)
a.size
20
a.dtype
dtype('int32')
a.itemsize
4
a.data
<memory at 0x00000000081FDC18>
a.data
<memory at 0x00000000081FD558>
a.data
<memory at 0x00000000081FD630>
## NumPy ndarray advantage over normal Python data structures:
* efficient and fast
my_csv.mean()
0.46707556552777774
my_csv.median()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-45-9875fbb1cc5e> in <module>() ----> 1 my_csv.median() AttributeError: 'numpy.ndarray' object has no attribute 'median'
a
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
a[2,0]=20
a
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [20, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
np.median(a, axis=1)
array([ 2., 7., 13., 17.])
np.median([32,3,7,10,20])
10.0
np.median([5,6,3,8,111,90])
7.0
sorted([5,6,3,8,111,90])
[3, 5, 6, 8, 90, 111]
b
array([ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105, 115, 125, 135, 145, 155, 165, 175, 185, 195, 205, 215, 225, 235, 245, 255, 265, 275, 285, 295, 305, 315, 325, 335, 345, 355, 365, 375, 385, 395, 405, 415, 425, 435, 445, 455, 465, 475, 485, 495])
b.mean()
250.0
np.median(b)
250.0
b.std() ## standard deviation
144.30869689661813
a
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [20, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
np.median(a, axis=0)
array([10. , 8.5, 9.5, 10.5, 11.5])
np.median(a, axis=1)
array([ 2., 7., 13., 17.])
np.mean(a, axis=0)
array([10. , 8.5, 9.5, 10.5, 11.5])
np.mean(a, axis=1)
array([ 2., 7., 14., 17.])
a.mean(axis=0) ## same as above
array([ 7.5, 8.5, 9.5, 10.5, 11.5])
np.median(a, axis=1)
array([ 2., 7., 12., 17.])
np.median(a, axis=2)
--------------------------------------------------------------------------- AxisError Traceback (most recent call last) <ipython-input-62-3bab11b7d1c6> in <module>() ----> 1 np.median(a, axis=2) C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\function_base.py in median(a, axis, out, overwrite_input, keepdims) 4117 """ 4118 r, k = _ureduce(a, func=_median, axis=axis, out=out, -> 4119 overwrite_input=overwrite_input) 4120 if keepdims: 4121 return r.reshape(k) C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _ureduce(a, func, **kwargs) 4011 keepdim = list(a.shape) 4012 nd = a.ndim -> 4013 axis = _nx.normalize_axis_tuple(axis, nd) 4014 4015 for ax in axis: C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\numeric.py in normalize_axis_tuple(axis, ndim, argname, allow_duplicate) 1504 except TypeError: 1505 axis = tuple(axis) -> 1506 axis = tuple(normalize_axis_index(ax, ndim, argname) for ax in axis) 1507 if not allow_duplicate and len(set(axis)) != len(axis): 1508 if argname: C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\numeric.py in <genexpr>(.0) 1504 except TypeError: 1505 axis = tuple(axis) -> 1506 axis = tuple(normalize_axis_index(ax, ndim, argname) for ax in axis) 1507 if not allow_duplicate and len(set(axis)) != len(axis): 1508 if argname: AxisError: axis 2 is out of bounds for array of dimension 2
d=np.arange(90).reshape(3,3,10)
d
array([[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]], [[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]], [[60, 61, 62, 63, 64, 65, 66, 67, 68, 69], [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]]])
d[2,1,5]
75
d.reshape(3,3,2,5)
array([[[[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9]], [[10, 11, 12, 13, 14], [15, 16, 17, 18, 19]], [[20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]], [[[30, 31, 32, 33, 34], [35, 36, 37, 38, 39]], [[40, 41, 42, 43, 44], [45, 46, 47, 48, 49]], [[50, 51, 52, 53, 54], [55, 56, 57, 58, 59]]], [[[60, 61, 62, 63, 64], [65, 66, 67, 68, 69]], [[70, 71, 72, 73, 74], [75, 76, 77, 78, 79]], [[80, 81, 82, 83, 84], [85, 86, 87, 88, 89]]]])
# of course, as we have 2-D data and we attempted to use 3rd dimension ! :)
A frequent error consists in calling array with multiple numeric arguments, rather than providing a single list of numbers as an argument.
c = np.array(2,3,4,5,6,7) # WRONG
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-66-6548e3003d99> in <module>() ----> 1 c = np.array(2,3,4,5,6,7) # WRONG ValueError: only 2 non-keyword arguments accepted
# CORRECT would be
c = np.array([2,3,4,6,7,9,44])
c
array([ 2, 3, 4, 6, 7, 9, 44])
t = np.array([4000000000000,"R"])
t
array(['4000000000000', 'R'], dtype='<U21')
t = np.array((5,6,7,9,1111111111112,0.4))
t
array([5.00000000e+00, 6.00000000e+00, 7.00000000e+00, 9.00000000e+00, 1.11111111e+12, 4.00000000e-01])
s = np.array(["Hello",'World','''Aha something strange'''])
s
array(['Hello', 'World', 'Aha something strange'], dtype='<U21')
s.itemsize
84
s.dtype
dtype('<U21')
## Can specify array type at creation
c = np.array([[3,4,5], [3,1,6]], dtype=complex)
c
array([[3.+0.j, 4.+0.j, 5.+0.j], [3.+0.j, 1.+0.j, 6.+0.j]])
c[0,0].real
3.0
c[0,0].imag
0.0
c[1,1].imag = 5
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-93-11ea9d4ba07d> in <module>() ----> 1 c[1,1].imag = 5 AttributeError: attribute 'imag' of 'numpy.generic' objects is not writable
# above does not work as imag only returns the value ie getter not setter
c[1,1] = 2 + 3j #so use j for imaginary part
c
array([[3.+0.j, 4.+0.j, 5.+0.j], [3.+0.j, 2.+3.j, 6.+0.j]])
c.dtype,c.size,c.shape
(dtype('complex128'), 6, (2, 3))
d = np.array([[3+4j,4,5+2.1j], [3,1-3j,6]], dtype=complex)
d
array([[3.+4. j, 4.+0. j, 5.+2.1j], [3.+0. j, 1.-3. j, 6.+0. j]])
d[0,0].real,d[0,0].imag
(3.0, 4.0)
z = np.zeros((5,6))
z[3,3] = 42
print(type(z))
print(z.dtype)
z
<class 'numpy.ndarray'> float64
array([[ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 42., 0., 0.], [ 0., 0., 0., 0., 0., 0.]])
## Data Type conversion IN PLACE!
z = z.astype(np.float32, copy=True)
print(z.dtype)
float32
z
array([[ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 42., 0., 0.], [ 0., 0., 0., 0., 0., 0.]], dtype=float32)
z = z.astype(np.int32, copy=True)
print(z.dtype)
z
int32
array([[ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 42, 0, 0], [ 0, 0, 0, 0, 0, 0]])
zz = z.astype(np.float32, copy=True)
zz
array([[ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 42., 0., 0.], [ 0., 0., 0., 0., 0., 0.]], dtype=float32)
zz[1,1]=5000000000
zz
array([[0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00], [0.0e+00, 5.0e+09, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00], [0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00], [0.0e+00, 0.0e+00, 0.0e+00, 4.2e+01, 0.0e+00, 0.0e+00], [0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00]], dtype=float32)
2**64
18446744073709551616
2**128
340282366920938463463374607431768211456
2**32
4294967296
2**31
2147483648
zzz = zz.astype(np.int64, copy=True)
zzz
array([[ 0, 0, 0, 0, 0, 0], [ 0, 5000000000, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 42, 0, 0], [ 0, 0, 0, 0, 0, 0]], dtype=int64)
ones = np.full((5,6), 1, dtype=np.int32)
ones
array([[1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]])
ones=ones+z
ones
array([[ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 43, 1, 1], [ 1, 1, 1, 1, 1, 1]])
ones = ones.astype(np.int32, copy=True)
ones
array([[ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 43, 1, 1], [ 1, 1, 1, 1, 1, 1]])
ones*z
array([[ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 1806, 0, 0], [ 0, 0, 0, 0, 0, 0]])
twos = np.full((5,6), 2, dtype=np.int32)
twos
array([[2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2]])
ones*twos
array([[ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 86, 2, 2], [ 2, 2, 2, 2, 2, 2]])
np.matmul(ones, twos)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-121-c47a4b300c6e> in <module>() ----> 1 np.matmul(ones, twos) ValueError: shapes (5,6) and (5,6) not aligned: 6 (dim 1) != 5 (dim 0)
## We mismatching dimensions for matrix multiplication
twos.transpose()
array([[2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2]])
mult = np.matmul(ones, twos.transpose())
mult
array([[12, 12, 12, 12, 12], [12, 12, 12, 12, 12], [12, 12, 12, 12, 12], [96, 96, 96, 96, 96], [12, 12, 12, 12, 12]])
mult1 = np.matmul(ones.transpose(), twos)
mult1
array([[10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [94, 94, 94, 94, 94, 94], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10]])
mult.sum(),mult1.sum()
(720, 864)
# Anyone remembers the algorithm for Matrix Multiplication, I don't but numpy lets us not worry about it :)
np.sqrt(mult)
array([[3.46410162, 3.46410162, 3.46410162, 3.46410162, 3.46410162], [3.46410162, 3.46410162, 3.46410162, 3.46410162, 3.46410162], [3.46410162, 3.46410162, 3.46410162, 3.46410162, 3.46410162], [9.79795897, 9.79795897, 9.79795897, 9.79795897, 9.79795897], [3.46410162, 3.46410162, 3.46410162, 3.46410162, 3.46410162]])
mult
array([[12, 12, 12, 12, 12], [12, 12, 12, 12, 12], [12, 12, 12, 12, 12], [96, 96, 96, 96, 96], [12, 12, 12, 12, 12]])
mult**2
array([[ 144, 144, 144, 144, 144], [ 144, 144, 144, 144, 144], [ 144, 144, 144, 144, 144], [9216, 9216, 9216, 9216, 9216], [ 144, 144, 144, 144, 144]], dtype=int32)
# np.sin, np.cos, np.exp(power), and so on
[1,3,65]+[4,6,7]
[1, 3, 65, 4, 6, 7]
vstack = np.vstack((ones,twos,ones)) # notice the double parenthesis, ie we supply a tuple as an argument!
vstack
array([[ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 43, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 1, 1, 1], [ 1, 1, 1, 43, 1, 1], [ 1, 1, 1, 1, 1, 1]])
hstack = np.hstack((twos,ones, twos))
hstack
array([[ 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2, 1, 1, 1, 43, 1, 1, 2, 2, 2, 2, 2, 2], [ 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]])
hsplit = np.hsplit(hstack, 3)
print(type(hsplit))
hsplit
<class 'list'>
[array([[ 1, 1, 1, 1], [ 1, 1, 1, 1], [ 1, 1, 1, 1], [ 1, 1, 1, 43], [ 1, 1, 1, 1]]), array([[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]]), array([[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2]])]
h2 = np.hsplit(hstack, 6)
print(type(h2))
h2
<class 'list'>
[array([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]), array([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]), array([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]), array([[ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 1], [43, 1, 1], [ 1, 1, 1]]), array([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]), array([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]])]
## vsplit is similar along vertical axis and array_split lets you specify axis
v = a.view()
v
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [20, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
v is a
False
v == a
array([[ True, True, True, True, True], [ True, True, True, True, True], [ True, True, True, True, True], [ True, True, True, True, True]])
v.base is a
False
## Turns out tutorial is outdated, new comparision for bases should be
v.base is a.base
True
v[2,2]=999
v
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [ 20, 11, 999, 13, 14], [ 15, 16, 17, 18, 19]])
a
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [ 20, 11, 999, 13, 14], [ 15, 16, 17, 18, 19]])
s = a[:,1:3]
s
array([[ 1, 2], [ 6, 7], [ 11, 999], [ 16, 17]])
s.base is a.base
True
s[:] = 55
s
array([[55, 55], [55, 55], [55, 55], [55, 55]])
a
array([[ 0, 55, 55, 3, 4], [ 5, 55, 55, 8, 9], [20, 55, 55, 13, 14], [15, 55, 55, 18, 19]])
b = a
b.base is a.base
True
d = a.copy()
d
array([[ 0, 55, 55, 3, 4], [ 5, 55, 55, 8, 9], [20, 55, 55, 13, 14], [15, 55, 55, 18, 19]])
d.base is a.base
False
d[1,1]=777
d
array([[ 0, 55, 55, 3, 4], [ 5, 777, 55, 8, 9], [ 20, 55, 55, 13, 14], [ 15, 55, 55, 18, 19]])
a
array([[ 0, 55, 55, 3, 4], [ 5, 55, 55, 8, 9], [20, 55, 55, 13, 14], [15, 55, 55, 18, 19]])
a[:1]
array([[ 0, 55, 55, 3, 4]])
a[0,1]
55
f = d == 55
f
array([[False, True, True, False, False], [False, False, True, False, False], [False, True, True, False, False], [False, True, True, False, False]])
e = d == a[0,1]
e
array([[False, True, True, False, False], [False, False, True, False, False], [False, True, True, False, False], [False, True, True, False, False]])
e.dtype
dtype('bool')
## More on copy vs sliceing: https://stackoverflow.com/questions/47181092/numpy-views-vs-copy-by-slicing
# key depends on how you slice
# slice by rows -> view (shallow copy)
# slice by column -> deep copy because not contigous
f=np.ones((5,6), dtype='int32')
f[2,3]=0
f
array([[1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]])
f.nonzero()
(array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4], dtype=int64), array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], dtype=int64))
d.nonzero()
(array([0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3], dtype=int64), array([1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int64))
arange, array, copy, empty, empty_like, eye, fromfile, fromfunction, identity, linspace, logspace, mgrid, ogrid, ones, ones_like, r, zeros, zeros_like
ndarray.astype, atleast_1d, atleast_2d, atleast_3d, mat
array_split, column_stack, concatenate, diagonal, dsplit, dstack, hsplit, hstack, ndarray.item, newaxis, ravel, repeat, reshape, resize, squeeze, swapaxes, take, transpose, vsplit, vstack
all, any, nonzero, where
argmax, argmin, argsort, max, min, ptp, searchsorted, sort
choose, compress, cumprod, cumsum, inner, ndarray.fill, imag, prod, put, putmask, real, sum
cov, mean, std, var
cross, dot, outer, linalg.svd, vdot
a
array([[ 0, 55, 55, 3, 4], [ 5, 55, 55, 8, 9], [20, 55, 55, 13, 14], [15, 55, 55, 18, 19]])
b = a > 17
b
array([[False, True, True, False, False], [False, True, True, False, False], [ True, True, True, False, False], [False, True, True, True, True]])
## USeful in assignments
a[b] = 500
a
array([[ 0, 500, 500, 3, 4], [ 5, 500, 500, 8, 9], [500, 500, 500, 13, 14], [ 15, 500, 500, 500, 500]])
a[b]
array([500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500])
a = np.arange(24)
a
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23])
a.shape = 6, -1
a
array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]])
a.shape = -1, 2
a
array([[ 0, 1], [ 2, 3], [ 4, 5], [ 6, 7], [ 8, 9], [10, 11], [12, 13], [14, 15], [16, 17], [18, 19], [20, 21], [22, 23]])
The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes
usually leads to efficient algorithm implementations.
e = np.arange(6)
f = np.arange(3)*10 #same as np.arange(0,30,10)
e,f
(array([0, 1, 2, 3, 4, 5]), array([ 0, 10, 20]))
np.arange(0,30,10)
array([ 0, 10, 20])
e + f
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-182-079705e5a008> in <module>() ----> 1 e + f ValueError: operands could not be broadcast together with shapes (6,) (3,)
e.reshape(6,1) + f.reshape(1,3)
array([[ 0, 10, 20], [ 1, 11, 21], [ 2, 12, 22], [ 3, 13, 23], [ 4, 14, 24], [ 5, 15, 25]])
e.reshape(6,1) * f.reshape(1,3)
array([[ 0, 0, 0], [ 0, 10, 20], [ 0, 20, 40], [ 0, 30, 60], [ 0, 40, 80], [ 0, 50, 100]])
mtab = np.arange(10).reshape(10,1) * np.arange(10).reshape(1,10)
mtab
array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18], [ 0, 3, 6, 9, 12, 15, 18, 21, 24, 27], [ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36], [ 0, 5, 10, 15, 20, 25, 30, 35, 40, 45], [ 0, 6, 12, 18, 24, 30, 36, 42, 48, 54], [ 0, 7, 14, 21, 28, 35, 42, 49, 56, 63], [ 0, 8, 16, 24, 32, 40, 48, 56, 64, 72], [ 0, 9, 18, 27, 36, 45, 54, 63, 72, 81]])
g=np.arange(12)
h=np.arange(4)*100
g,h
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), array([ 0, 100, 200, 300]))
g.reshape(4,3)
array([[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8], [ 9, 10, 11]])
g.reshape(4,3)+h.reshape(4,1)
array([[ 0, 1, 2], [103, 104, 105], [206, 207, 208], [309, 310, 311]])
g.reshape(4,3)+50
array([[50, 51, 52], [53, 54, 55], [56, 57, 58], [59, 60, 61]])
## We can do this with more dimensions as well! But our 3D thinking heads will hurt!
## Basic image processing with NumPy (PIL is another option!)
url='https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'
save_file='lena.png'
#Original: November 1972, photographed by Dwight Hooker.
#This 512x512 electronic/mechanical scan of a section of the full portrait: Alexander Sawchuk (1972) and two others
#Permission
#Use of this 512x512 scan is "overlooked" and by implication permitted by Playboy
import urllib.request
import shutil
# Current recommended way of reading a file from url and then saving it immediately and closing the file
with urllib.request.urlopen(url) as response, open(save_file, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
Including support for animated images, volumetric data, and scientific formats. It is cross-platform, runs on Python 2.7 and 3.4+, and is easy to install.
Main website: http://imageio.github.io
import imageio
lena = imageio.imread('lena.png')
l2 = imageio.imread('https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png')
type(lena)
imageio.core.util.Image
We can find its Parent class with myclass.bases
imageio.core.util.Image.__bases__
(numpy.ndarray,)
import matplotlib.pyplot as plt
plt.imshow(lena)
<matplotlib.image.AxesImage at 0x8a40da0>
lena.shape
(512, 512, 3)
lena.ndim
3
lena.size
786432
lena = imageio.imread('lena.png')
#lena[60:160,60:160,]=90
lena[30:60,30:60,0]= 10
lena[60:160,60:160,1]=180
lena[160:260,160:260,2]=180
plt.imshow(lena)
<matplotlib.image.AxesImage at 0x8aeac88>
plt.imshow(lena)
<matplotlib.image.AxesImage at 0x8b6a7f0>
# working with URL files directly
import requests
from io import BytesIO
import matplotlib.image as mpimg
lena2 = mpimg.imread('lena.png')
type(lena2)
numpy.ndarray
plt.imshow(lena2)
<matplotlib.image.AxesImage at 0x904eac8>
response = requests.get(url)
lena3 = mpimg.imread(BytesIO(response.content))
type(lena3)
# this method copies the image directly from the stream into our img
numpy.ndarray
plt.imshow(lena3)
<matplotlib.image.AxesImage at 0xc0f51d0>
from PIL import Image
response = requests.get(url)
img = Image.open(BytesIO(response.content))
# this method does not copies the image directly from the stream into our img
# img.show() # in external viewer not in Jupyter!
plt.imshow(img)
<matplotlib.image.AxesImage at 0x99c79e8>
img.size,img.format_description
((512, 512), 'Portable network graphics')
dir(img)
['_Image__transformer', '_PngImageFile__idat', '__array_interface__', '__class__', '__copy__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_close_exclusive_fp_after_loading', '_copy', '_crop', '_dump', '_ensure_mutable', '_exclusive_fp', '_expand', '_min_frame', '_new', '_open', '_repr_png_', '_seek_check', 'alpha_composite', 'category', 'close', 'convert', 'copy', 'crop', 'decoderconfig', 'decodermaxblock', 'draft', 'effect_spread', 'encoderconfig', 'encoderinfo', 'filename', 'filter', 'format', 'format_description', 'fp', 'frombytes', 'fromstring', 'getbands', 'getbbox', 'getchannel', 'getcolors', 'getdata', 'getextrema', 'getim', 'getpalette', 'getpixel', 'getprojection', 'height', 'histogram', 'im', 'info', 'load', 'load_end', 'load_prepare', 'load_read', 'map', 'mode', 'offset', 'palette', 'paste', 'png', 'point', 'putalpha', 'putdata', 'putpalette', 'putpixel', 'pyaccess', 'quantize', 'readonly', 'remap_palette', 'resize', 'rotate', 'save', 'seek', 'show', 'size', 'split', 'tell', 'text', 'thumbnail', 'tile', 'tobitmap', 'tobytes', 'toqimage', 'toqpixmap', 'tostring', 'transform', 'transpose', 'verify', 'width']
type(img)
PIL.PngImagePlugin.PngImageFile
import matplotlib.pyplot as plt
# Build a vector of 10000 normal deviates with variance 0.5^2 and mean 2
mu, sigma = 2, 0.5
v = np.random.normal(mu,sigma,20000)
v[:20], v.mean(),v.max(),v.min()
(array([2.31632739, 2.219109 , 1.53796432, 2.46312243, 1.90145972, 1.47151823, 2.21456619, 2.05749909, 1.91774699, 1.94418668, 2.51501025, 2.43988414, 2.47336132, 2.54512566, 0.95851055, 0.96754106, 3.31049021, 1.72561352, 1.58489223, 1.71679022]), 1.9930709054847882, 3.9389775779957246, 0.2489672552480482)
# Plot a normalized histogram with 50 bins
plt.hist(v, bins=500, normed=1) # matplotlib version (plot)
plt.show()
# Compute the histogram with numpy and then plot it
(n, bins) = np.histogram(v, bins=500, normed=True) # NumPy version (no plot)
plt.plot(.5*(bins[1:]+bins[:-1]), n)
plt.show()
Python for Data Analysis # excellent, by the creator of Pandas
NumPy Cookbook # some good recipes but uneven, this author has many books on NumPy
From Python to Numpy # nice free resource