![48b895bb969782650089d1c1fcaef75a.png](https://i-blog.csdnimg.cn/blog_migrate/2101c0c4335d00ec04d34606d8e3cfd1.jpeg)
3 numpy.array 基本操作
import numpy as np
np.random.seed(0)
x = np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
X = np.arange(15).reshape((3, 5))
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
3.1 numpy.array 的基本属性
numpy.array 的基本属性如下表所示
![57554d31b835860a7cc5d1c3a1cf32ba.png](https://i-blog.csdnimg.cn/blog_migrate/c1962d1bd46494e0aa638ee4bd24a9a0.jpeg)
x.ndim
1
X.ndim
2
x.shape
(10,)
X.shape
(3, 5)
x.size
10
X.size
15
x.dtype
dtype('int32')
X.dtype
dtype('int32')
3.2 数组索引与切片
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[0] # 取第一个数
0
x[-1] # 取倒数第一个数
9
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
X[0][0] # 不建议!一般使用下面这种方式
0
X[0, 0]
0
X[0, -1] # -1表示访问最后一个位置元素
4
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[0:5]
array([0, 1, 2, 3, 4])
x[:5]
array([0, 1, 2, 3, 4])
x[4:7]
array([4, 5, 6])
x[::2]
array([0, 2, 4, 6, 8])
x[::-1]
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
X[:2, :3]
array([[0, 1, 2],
[5, 6, 7]])
X[:2][:3] # 结果不一样,在numpy中使用","做多维索引
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
X[:2, ::2] # 取前两行,步长为2
array([[0, 2, 4],
[5, 7, 9]])
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
X[::-1, ::-1] # 行和列都倒着写
array([[14, 13, 12, 11, 10],
[ 9, 8, 7, 6, 5],
[ 4, 3, 2, 1, 0]])
X[0, :]
array([0, 1, 2, 3, 4])
X[:, 0]
array([ 0, 5, 10])
3.3 子数组
subX = X[:2, :3]
subX
array([[0, 1, 2],
[5, 6, 7]])
subX[0, 0] = 100
subX
array([[100, 1, 2],
[ 5, 6, 7]])
X
array([[100, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[ 10, 11, 12, 13, 14]])
numpy中修改子矩阵会修改原矩阵,这里使用的是引用传递
X[0, 0] = 0
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
subX
array([[0, 1, 2],
[5, 6, 7]])
subX = X[:2, :3].copy() # copy复制一份出来,防止被修改
subX[0, 0] = 100
subX
array([[100, 1, 2],
[ 5, 6, 7]])
X
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
3.4 数组变换
数组变换常见函数如下:
![70f6d5fceae1983c535ec15fa5405fe8.png](https://i-blog.csdnimg.cn/blog_migrate/e404e9946c2c340c5f010a9c53789413.jpeg)
x.shape
(10,)
x.ndim
1
x.reshape(2, 5)
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
x.reshape(2, 5)
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
x.astype(dtype = float) # 变换数组的数据类型
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
A = x.reshape(2, 5)
A
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
x # 不改变原来的数组
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
B = x.reshape(1, 10)
B
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
B.ndim
2
B.shape
(1, 10)
维度和shape,假使只有一行,如果指定了reshape为(1,10)也是一个二维数组
x.reshape(-1, 10)
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
x.reshape(10, -1)
array([[0],
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9]])
-1表示我不管,我就要十行;满足具体的某个数值,剩下的交给-1自己进行分配。
x.reshape(2, -1)
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
x.reshape(-1,2)
array([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])
x.reshape(3,-1) # 无法满足将报错
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-116-686a4c1cf8de> in <module>
----> 1 x.reshape(3,-1) # 无法满足将报错
ValueError: cannot reshape array of size 10 into shape (3,newaxis)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x.resize(2, 5) # resize与resahpe功能几乎一致,只是resize无法使用-1,而且是原地修改
x
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
x.flatten()
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x.swapaxes(0, 1)
array([[0, 5],
[1, 6],
[2, 7],
[3, 8],
[4, 9]])
4 数组的合并与分割
4.1 数组拼接
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])
array([1, 2, 3, 3, 2, 1])
z = np.array([666, 666, 666])
np.concatenate([x, y, z])
array([ 1, 2, 3, 3, 2, 1, 666, 666, 666])
A = np.array([[1, 2, 3],
[4, 5, 6]])
np.concatenate([A, A])
array([[1, 2, 3],
[4, 5, 6],
[1, 2, 3],
[4, 5, 6]])
# 二维数组默认被按行拼接,如果需要按列拼接,则需要指定axis为1
np.concatenate([A, A], axis=1)
array([[1, 2, 3, 1, 2, 3],
[4, 5, 6, 4, 5, 6]])
np.concatenate([A, z]) # 维数不同,无法拼接,所以报错
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-136-951715408506> in <module>
----> 1 np.concatenate([A, z]) # 维数不同,无法拼接,所以报错
<__array_function__ internals> in concatenate(*args, **kwargs)
ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
np.concatenate([A, z.reshape(1, -1)]) # 改变成二维,维数相同即可进行拼接
array([[ 1, 2, 3],
[ 4, 5, 6],
[666, 666, 666]])
# 垂直方向拼接
np.vstack([A, z])
array([[ 1, 2, 3],
[ 4, 5, 6],
[666, 666, 666]])
B = np.full((2,2), 100)
# 水平方向拼接
np.hstack([A, B])
array([[ 1, 2, 3, 100, 100],
[ 4, 5, 6, 100, 100]])
4.2 数组分割
x = np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# 两个分割点产生三个数
x1, x2, x3 = np.split(x, [3, 7])
print(x1, x2, x3, sep = 'n')
[0 1 2]
[3 4 5 6]
[7 8 9]
x1, x2 = np.split(x, [5])
print(x1, x2, sep = 'n')
[0 1 2 3 4]
[5 6 7 8 9]
A = np.arange(16).reshape((4, 4))
A
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
# 默认按行分割
A1, A2 = np.split(A, [2])
print(A1, A2, sep = 'nn')
[[0 1 2 3]
[4 5 6 7]]
[[ 8 9 10 11]
[12 13 14 15]]
# 指定axis为1,按列进行分割
A1, A2 = np.split(A, [3], axis=1)
print(A1, A2, sep = 'nn')
[[ 0 1 2]
[ 4 5 6]
[ 8 9 10]
[12 13 14]]
[[ 3]
[ 7]
[11]
[15]]
# 垂直分割,也就是按行分割
upper, lower = np.vsplit(A, [2])
print(upper, lower, sep = 'nn')
[[0 1 2 3]
[4 5 6 7]]
[[ 8 9 10 11]
[12 13 14 15]]
# 水平分割,也就是按列分割
left, right = np.hsplit(A, [2])
print(left, right, sep = 'nn')
[[ 0 1]
[ 4 5]
[ 8 9]
[12 13]]
[[ 2 3]
[ 6 7]
[10 11]
[14 15]]
data = np.arange(16).reshape((4, 4))
data
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
# 水平分割,分割列。将最后一列标签分割出来,一般分割特征和标签都是利用这个函数
X, y = np.hsplit(data, [-1])
print(X, y, sep = 'nn')
[[ 0 1 2]
[ 4 5 6]
[ 8 9 10]
[12 13 14]]
[[ 3]
[ 7]
[11]
[15]]
5 数组运算
相较于Python的列表运算或者是自定义函数,numpy中数组的运算效率要远远高于前两者。
5.1 一元函数与二元函数
- 一元函数
![07e729efaa3787f86d4e3007b62f77a9.png](https://i-blog.csdnimg.cn/blog_migrate/f49e9c97fcf4e9c39bc5289fbd38b3c2.jpeg)
![354c68ed49ad8b8e6e0f41ecfc20ba04.png](https://i-blog.csdnimg.cn/blog_migrate/1d51ec3d83022a7d6a6a33230b97b071.jpeg)
- 二元函数
![8bb4748a3bece54b36934e7c5d0c4ae0.png](https://i-blog.csdnimg.cn/blog_migrate/2a62714862bfd851ef927c95bb1b8979.jpeg)
X = np.arange(1, 16).reshape((3, 5))
X
array([[ 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]])
X + 1
array([[ 2, 3, 4, 5, 6],
[ 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16]])
X - 1
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
X * 2
array([[ 2, 4, 6, 8, 10],
[12, 14, 16, 18, 20],
[22, 24, 26, 28, 30]])
X / 2
array([[0.5, 1. , 1.5, 2. , 2.5],
[3. , 3.5, 4. , 4.5, 5. ],
[5.5, 6. , 6.5, 7. , 7.5]])
X // 2
array([[0, 1, 1, 2, 2],
[3, 3, 4, 4, 5],
[5, 6, 6, 7, 7]], dtype=int32)
X ** 2
array([[ 1, 4, 9, 16, 25],
[ 36, 49, 64, 81, 100],
[121, 144, 169, 196, 225]], dtype=int32)
X % 2
array([[1, 0, 1, 0, 1],
[0, 1, 0, 1, 0],
[1, 0, 1, 0, 1]], dtype=int32)
np.abs(X)
array([[ 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]])
np.sin(X)
array([[ 0.84147098, 0.90929743, 0.14112001, -0.7568025 , -0.95892427],
[-0.2794155 , 0.6569866 , 0.98935825, 0.41211849, -0.54402111],
[-0.99999021, -0.53657292, 0.42016704, 0.99060736, 0.65028784]])
np.cos(X)
array([[ 0.54030231, -0.41614684, -0.9899925 , -0.65364362, 0.28366219],
[ 0.96017029, 0.75390225, -0.14550003, -0.91113026, -0.83907153],
[ 0.0044257 , 0.84385396, 0.90744678, 0.13673722, -0.75968791]])
np.tan(X)
array([[ 1.55740772e+00, -2.18503986e+00, -1.42546543e-01,
1.15782128e+00, -3.38051501e+00],
[-2.91006191e-01, 8.71447983e-01, -6.79971146e+00,
-4.52315659e-01, 6.48360827e-01],
[-2.25950846e+02, -6.35859929e-01, 4.63021133e-01,
7.24460662e+00, -8.55993401e-01]])
np.arctan(X)
array([[0.78539816, 1.10714872, 1.24904577, 1.32581766, 1.37340077],
[1.40564765, 1.42889927, 1.44644133, 1.46013911, 1.47112767],
[1.48013644, 1.48765509, 1.49402444, 1.49948886, 1.50422816]])
np.exp(X)
array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
1.48413159e+02],
[4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03,
2.20264658e+04],
[5.98741417e+04, 1.62754791e+05, 4.42413392e+05, 1.20260428e+06,
3.26901737e+06]])
# 表示2的x次方
a = [1,2,3,4,5]
np.exp2(a)
array([ 2., 4., 8., 16., 32.])
np.power(3, X)
array([[ 3, 9, 27, 81, 243],
[ 729, 2187, 6561, 19683, 59049],
[ 177147, 531441, 1594323, 4782969, 14348907]], dtype=int32)
np.log(X)
array([[0. , 0.69314718, 1.09861229, 1.38629436, 1.60943791],
[1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509],
[2.39789527, 2.48490665, 2.56494936, 2.63905733, 2.7080502 ]])
np.log2(X)
array([[0. , 1. , 1.5849625 , 2. , 2.32192809],
[2.5849625 , 2.80735492, 3. , 3.169925 , 3.32192809],
[3.45943162, 3.5849625 , 3.70043972, 3.80735492, 3.9068906 ]])
np.log10(X)
array([[0. , 0.30103 , 0.47712125, 0.60205999, 0.69897 ],
[0.77815125, 0.84509804, 0.90308999, 0.95424251, 1. ],
[1.04139269, 1.07918125, 1.11394335, 1.14612804, 1.17609126]])
5.2 矩阵运算
A = np.arange(4).reshape(2, 2)
A
array([[0, 1],
[2, 3]])
B = np.full((2, 2), 10)
B
array([[10, 10],
[10, 10]])
A + B
array([[10, 11],
[12, 13]])
A - B
array([[-10, -9],
[ -8, -7]])
A * B
array([[ 0, 10],
[20, 30]])
numpy中所有运算加减乘除都是对应元素进行计算。
# 矩阵乘法
A.dot(B)
array([[10, 10],
[50, 50]])
# 矩阵转置,行变列;列变行
A.T
array([[0, 2],
[1, 3]])
C = np.full((3, 3), 666)
A + C # 无法广播运算,因为行和列都不相等
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-198-29ea2e7ca8f8> in <module>
----> 1 A + C # 无法广播运算,因为行和列都不相等
ValueError: operands could not be broadcast together with shapes (2,2) (3,3)
5.3 向量和矩阵的运算
5.3.1 加法
v = np.array([1, 2])
v
array([1, 2])
A
array([[0, 1],
[2, 3]])
v + A # 广播运算
array([[1, 3],
[3, 5]])
np.vstack([v] * A.shape[0])
array([[1, 2],
[1, 2]])
np.vstack([v] * A.shape[0]) + A
array([[1, 3],
[3, 5]])
# tile 规定对于这个向量在行列中如何堆叠
np.tile(v, (2, 1))
array([[1, 2],
[1, 2]])
np.tile(v, (2, 1)) + A
array([[1, 3],
[3, 5]])
np.tile(v, (2, 2))
array([[1, 2, 1, 2],
[1, 2, 1, 2]])
5.3.2 乘法
v * A
array([[0, 2],
[2, 6]])
v.dot(A)
array([4, 7])
A.dot(v)
array([2, 8])
向量和矩阵运算,numpy会自动的帮我们判断这是个列向量还是行向量,然后取合适的
5.3.3 矩阵的逆
linalg 线性代数,模块内的inv 矩阵的逆
np.linalg.inv(A)
array([[-1.5, 0.5],
[ 1. , 0. ]])
invA = np.linalg.inv(A)
A.dot(invA)
array([[1., 0.],
[0., 1.]])
invA.dot(A)
array([[1., 0.],
[0., 1.]])
# 逆矩阵是个方阵
X = np.arange(16).reshape((2, 8))
invX = np.linalg.inv(X)
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-220-aea614a92c0f> in <module>
----> 1 invX = np.linalg.inv(X)
<__array_function__ internals> in inv(*args, **kwargs)
D:Anaconda3libsite-packagesnumpylinalglinalg.py in inv(a)
540 a, wrap = _makearray(a)
541 _assert_stacked_2d(a)
--> 542 _assert_stacked_square(a)
543 t, result_t = _commonType(a)
544
D:Anaconda3libsite-packagesnumpylinalglinalg.py in _assert_stacked_square(*arrays)
211 m, n = a.shape[-2:]
212 if m != n:
--> 213 raise LinAlgError('Last 2 dimensions of the array must be square')
214
215 def _assert_finite(*arrays):
LinAlgError: Last 2 dimensions of the array must be square
# 矩阵的伪逆
pinvX = np.linalg.pinv(X)
pinvX
array([[-1.35416667e-01, 5.20833333e-02],
[-1.01190476e-01, 4.16666667e-02],
[-6.69642857e-02, 3.12500000e-02],
[-3.27380952e-02, 2.08333333e-02],
[ 1.48809524e-03, 1.04166667e-02],
[ 3.57142857e-02, -7.30583920e-18],
[ 6.99404762e-02, -1.04166667e-02],
[ 1.04166667e-01, -2.08333333e-02]])
X.dot(pinvX)
array([[ 1.00000000e+00, -2.77555756e-16],
[ 1.69309011e-15, 1.00000000e+00]])