NumPy之于数值计算特别重要的原因之一,是因为它可以高效处理大数组的数据。这是因为:
import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
%time for n in range(10):my_arr2 = my_arr * 2
Wall time: 24.9 ms
my_arr2[:10]
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
%time for m in range(10): my_list2 = [x * 2 for x in my_list]
Wall time: 1.2 s
my_list2[:10]
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
import numpy as np
# 产生2*3的数组
data = np.random.randn(2,3)
data
array([[ 1.46500606, 0.38171158, 0.20194511], [-1.21663118, -0.65103605, 2.03941173]])
#数组的每个元素乘以一个实数
data * 10
array([[ 14.65006062, 3.81711579, 2.01945106], [-12.1663118 , -6.51036045, 20.39411734]])
data + data
array([[ 2.93001212, 0.76342316, 0.40389021], [-2.43326236, -1.30207209, 4.07882347]])
data.shape
(2, 3)
data.dtype
dtype('float64')
data1 = [1,2,3,6.5,0]
arr1 = np.array(data1)
arr1
array([1. , 2. , 3. , 6.5, 0. ])
data2 = [[1,2,3],[4,5,6]]
arr2 = np.array(data2)
arr2
array([[1, 2, 3], [4, 5, 6]])
arr2.ndim #维度
2
arr2.shape
(2, 3)
arr2.dtype
dtype('int32')
np.zeros((2,3))
array([[0., 0., 0.], [0., 0., 0.]])
arr3 = np.ones((2,2,2))
arr3
array([[[1., 1.], [1., 1.]], [[1., 1.], [1., 1.]]])
arr3.ndim
3
arr3.shape
(2, 2, 2)
np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr4 = np.array([1,2,3],dtype = np.float)
arr4
array([1., 2., 3.])
arr4.dtype
dtype('float64')
arr5 = np.array([1,2,3],dtype=np.int32)
arr5
array([1, 2, 3])
arr5.dtype
dtype('int32')
float_arr5 = arr5.astype(np.float64)
float_arr5.dtype
dtype('float64')
f_arr = np.array([1.22, 33.211, 20.01])
f_arr
array([ 1.22 , 33.211, 20.01 ])
f_arr.dtype
dtype('float64')
i_arr = f_arr.astype(np.int32)
i_arr
array([ 1, 33, 20])
i_arr.dtype
dtype('int32')
s_arr = np.array(['1','2.22','3.01'])
s_arr.dtype
dtype('<U4')
s_arr.astype(np.float64)
array([1. , 2.22, 3.01])
arr = np.array([[1,2,3.0],[4.,5.,6.]])
arr
array([[1., 2., 3.], [4., 5., 6.]])
arr **2
array([[ 1., 4., 9.], [16., 25., 36.]])
arr * arr
array([[ 1., 4., 9.], [16., 25., 36.]])
#数组与标量的算术运算会将标量值传播到各个元素
#不同大小的数组之间的运算叫做广播(broadcasting)
arr / 2
array([[0.5, 1. , 1.5], [2. , 2.5, 3. ]])
arr // 2
array([[0., 1., 1.], [2., 2., 3.]])
arr - 1
array([[0., 1., 2.], [3., 4., 5.]])
# shape相同的数组比较会产生一个布尔数组
arr2 = np.array([[4,1,1],[7,5,5]])
arr > arr2
array([[False, True, True], [False, False, True]])
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[5]
5
arr[:6]
array([0, 1, 2, 3, 4, 5])
arr[2:5] = 10
arr
array([ 0, 1, 10, 10, 10, 5, 6, 7, 8, 9])
arr_slice = arr[2:5]
arr_slice
array([10, 10, 10])
#当修改arr_slice时,arr数组也会被修改
arr_slice[1] = 100
arr_slice
array([ 10, 100, 10])
arr
array([ 0, 1, 10, 100, 10, 5, 6, 7, 8, 9])
arr_copy = arr[:3].copy()
arr_copy
array([ 0, 1, 10])
arr_copy[0] = 100
arr_copy
array([100, 1, 10])
arr
array([ 0, 1, 10, 100, 10, 5, 6, 7, 8, 9])
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d[1]
array([4, 5, 6])
arr2d[1,1]
5
arr2d[1][1]
5
arr2d.ndim
2
# 三维数组
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
arr3d
array([[[ 1, 2, 3], [ 4, 5, 6]], [[ 7, 8, 9], [10, 11, 12]]])
arr3d.shape
(2, 2, 3)
arr3d[0]
array([[1, 2, 3], [4, 5, 6]])
arr3d[0,1]
array([4, 5, 6])
arr3d[0,1,2]
6
# 标量值可以直接赋值给数组得某个元素,具有广播
arr3d[0] = 0
arr3d
array([[[ 0, 0, 0], [ 0, 0, 0]], [[ 7, 8, 9], [10, 11, 12]]])
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d
array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[:2]
array([[1, 2, 3], [4, 5, 6]])
arr2d[:2,:2]
array([[1, 2], [4, 5]])
arr2d[1,:2]
array([4, 5])
arr2d[:2,:2] = 1
arr2d
array([[1, 1, 3], [1, 1, 6], [7, 8, 9]])
names = np.array(['Bob','Joe','Will','Bob','will','Joe','Joe'])
data = np.random.randn(7,4)
names
array(['Bob', 'Joe', 'Will', 'Bob', 'will', 'Joe', 'Joe'], dtype='<U4')
data
array([[-0.22069039, -0.3274228 , -1.16742577, -0.73831674], [-0.17419963, -1.86709013, 1.07920976, -1.10985636], [ 1.55001149, -2.08476377, -1.14224182, -1.15043735], [ 0.98140083, 0.96417792, 3.68246684, -1.21193783], [-0.49201841, 0.93379201, -2.06124303, 1.73694261], [ 0.76176287, 0.59960082, -0.72839576, -1.22476123], [ 0.03756529, 2.76069372, 1.33604503, -1.47566927]])
names == 'Bob'
array([ True, False, False, True, False, False, False])
data[names == 'Bob']
array([[-0.22069039, -0.3274228 , -1.16742577, -0.73831674], [ 0.98140083, 0.96417792, 3.68246684, -1.21193783]])
data[names == 'Bob',2:]
array([[-1.16742577, -0.73831674], [ 3.68246684, -1.21193783]])
data[names == 'Bob',2] #索引列
array([-1.16742577, 3.68246684])
names != 'Bob'
array([False, True, True, False, True, True, True])
data[~(names == 'Bob')]
array([[-0.17419963, -1.86709013, 1.07920976, -1.10985636], [ 1.55001149, -2.08476377, -1.14224182, -1.15043735], [-0.49201841, 0.93379201, -2.06124303, 1.73694261], [ 0.76176287, 0.59960082, -0.72839576, -1.22476123], [ 0.03756529, 2.76069372, 1.33604503, -1.47566927]])
mask = (names == 'Bob') |(names == 'Will')
mask
array([ True, False, True, True, False, False, False])
data[mask]
array([[-0.22069039, -0.3274228 , -1.16742577, -0.73831674], [ 1.55001149, -2.08476377, -1.14224182, -1.15043735], [ 0.98140083, 0.96417792, 3.68246684, -1.21193783]])
# 将data中小于零的元素设置为0
data[data < 0] = 0
data
array([[0. , 0. , 0. , 0. ], [0. , 0. , 1.07920976, 0. ], [1.55001149, 0. , 0. , 0. ], [0.98140083, 0.96417792, 3.68246684, 0. ], [0. , 0.93379201, 0. , 1.73694261], [0.76176287, 0.59960082, 0. , 0. ], [0.03756529, 2.76069372, 1.33604503, 0. ]])
# 通过一维布尔数组设置整行或列的值
data[names != 'Joe'] = 7
data
array([[7. , 7. , 7. , 7. ], [0. , 0. , 1.07920976, 0. ], [7. , 7. , 7. , 7. ], [7. , 7. , 7. , 7. ], [7. , 7. , 7. , 7. ], [0.76176287, 0.59960082, 0. , 0. ], [0.03756529, 2.76069372, 1.33604503, 0. ]])
arr = np.empty((8,4))
for i in range(8):
arr[i] = i
arr
array([[0., 0., 0., 0.], [1., 1., 1., 1.], [2., 2., 2., 2.], [3., 3., 3., 3.], [4., 4., 4., 4.], [5., 5., 5., 5.], [6., 6., 6., 6.], [7., 7., 7., 7.]])
# 为了以特定的顺序选取行子集,只需要传入一个用于指定顺序的整数列表或数组即可
arr[[4,3,0,6]]
array([[4., 4., 4., 4.], [3., 3., 3., 3.], [0., 0., 0., 0.], [6., 6., 6., 6.]])
# 也可以使用负数索引,会从末尾开始选取行
arr[[-1,-3,-5]]
array([[7., 7., 7., 7.], [5., 5., 5., 5.], [3., 3., 3., 3.]])
arr = np.arange(32).reshape((8,4))
arr
array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27], [28, 29, 30, 31]])
#最终选出的是元素(1,0)、(5,3)、(7,1)和(2,2)。无论数组是多少维的,花式索引总是一维的。
arr[[1,5,7,2],[0,3,1,2]]
array([ 4, 23, 29, 10])
arr[[1,5,7,2]]
array([[ 4, 5, 6, 7], [20, 21, 22, 23], [28, 29, 30, 31], [ 8, 9, 10, 11]])
arr[[1,5,7,2]][:,[0,3,1,2]]
array([[ 4, 7, 5, 6], [20, 23, 21, 22], [28, 31, 29, 30], [ 8, 11, 9, 10]])