一、什么时候用numpy
numpy数组内的数据类型是统一的、连续存储在内存中的。
在数据处理的过程中,遇到使用for循环,实现一些向量化、矩阵化操作的时候,使用numpy相对更高效。
二、数组的创建
1、从列表开始创建
(1)一般设置
import numpy as np
x = np.array([1,23,4,5])
print(x) # [ 1 23 4 5]
print(type(x)) # <class 'numpy.ndarray'>
print(x.shape) # (4,) 表示数组的形状 一维数组,4个元素
(2)设置数组的数据类型
x = np.array([1,23,4,5],dtype="float32")
print(x) # [ 1. 23. 4. 5.]
print(type(x[0])) # <class 'numpy.float32'>
(3)二维数组
x = np.array([[1,2,3],[4,5,6]])
print(x)
'''
[[1 2 3]
[4 5 6]]
'''
print(x.shape) # (2, 3)
2、从头创建数组
(1)创建长度为5的数组,值都为0
x = np.zeros(5,dtype=int)
print(x) # [0 0 0 0 0]
(2)创建一个2*4的浮点型数值,值都为1
x = np.ones((2,4),dtype=float)
print(x)
'''
[[1. 1. 1. 1.]
[1. 1. 1. 1.]]
'''
(3)创建一个2*3的数组,值都为8.8
x = np.full((2,3),8.8)
print(x)
'''
[[8.8 8.8 8.8]
[8.8 8.8 8.8]]
'''
(4)创建一个3*3的单位矩阵
x = np.eye(3)
print(x)
'''
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
'''
(5)创建一个线性序列数组,从1开始,到15结束,步长为2
x = np.arange(1,15,2)
print(x) # [ 1 3 5 7 9 11 13]
(6)创建一个4个元素的数组,这4个数均匀的分配到0-1
x = np.linspace(0,1,4)
print(x) # [0. 0.33333333 0.66666667 1. ]
(7)创建一个5个元素的数组,形成1~10^8的等比数列
x = np.logspace(0,8,5)
print(x) # [1.e+00 1.e+02 1.e+04 1.e+06 1.e+08]
(8)创建一个3*3的,在0-1之间均匀分布的随机数构成的数组
x = np.random.random((3,3))
print(x)
'''
[[0.18070848 0.53711516 0.20083225]
[0.48639276 0.161681 0.87641141]
[0.96573141 0.0585122 0.57736042]]
'''
(9)创建一个3*3的,均值为0,标准差为1的正态分布的随机数构成的数组
x = np.random.normal(0,1,(3,3))
print(x)
'''
[[ 0.10673871 1.37583368 -0.72173478]
[ 0.49479055 0.04891739 0.35865035]
[ 1.48031143 0.07684527 -0.00875594]]
'''
(10)创建一个3*3的,在[0,10)之间的离散均匀分布的随机整数构成的数组
x = np.random.randint(0,10,(3,3))
print(x)
'''
[[7 8 2]
[3 0 5]
[0 5 5]]
'''
(11)随机重排列
x = np.array([10,20,30,40])
y = np.random.permutation(x) # 生产新列表
print(y) # [40 20 10 30]
x = np.array([10,20,30,40])
np.random.shuffle(x) # 修改原列表
print(x) # [30 20 10 40]
(12)随机采样
按指定形状采样
x = np.arange(10,20,dtype=float)
y = np.random.choice(x,size=(2,3))
print(y)
'''
[[11. 13. 18.]
[12. 14. 16.]]
'''
按概率采样
x = np.arange(10,20,dtype=float)
y = np.random.choice(x,size=(2,3),p=x/np.sum(x))
print(y)
'''
[[14. 17. 15.]
[15. 17. 18.]]
'''
三、数组的性质
1、 数组的属性
(1)数组的形状shape
x = np.random.randint(10,size=(3,4))
print(x)
'''
[[0 0 0 9]
[1 8 6 7]
[0 9 1 3]]
'''
print(x.shape) # (3, 4)
(2)数组的维度ndim
print(x.ndim) # 2
(3)数组的大小size
print(x.size) # 12
(4)数组的数据类型dtype
print(x.dtype) # int32
2、数组的索引
(1)一维数组的索引
x1 = np.arange(10)
print(x1) # [0 1 2 3 4 5 6 7 8 9]
print(x1[0],x1[5],x1[-1]) # 0 5 9
(2)多维数组的索引(以二维为例)
x2 = np.random.randint(0,10,(2,3))
print(x2)
'''
[[5 3 5]
[2 9 2]]
'''
print(x2[0,1]) # 3
# 也可以写成如下形式
print(x2[0][1]) # 3
(3)注意事项
numpy数组的数据类型是固定的,向一个整型数组插入一个浮点值,浮点值会向下取整。
x2[0][1] = 1.9
print(x2[0][1]) # 1
3、数组的切片
(1)一维数组——跟列表一样
x1 = np.arange(10)
print(x1) # [0 1 2 3 4 5 6 7 8 9]
print(x1[:3]) # [0 1 2]
print(x1[3:]) # [3 4 5 6 7 8 9]
print(x1[::-1]) # [9 8 7 6 5 4 3 2 1 0]
(2)多维数组——以二维为例
x2 = np.random.randint(20,size=(3,4))
print(x2)
'''
[[14 15 16 1]
[19 8 0 11]
[ 8 10 2 5]]
'''
print(x2[:2, :3]) # 前两行,前三列
print(x2[:2, 0:3:2]) # 前两行,前三列(每隔一列)
print(x2[::-1, ::-1])
'''
[[ 5 2 10 8]
[11 0 8 19]
[ 1 16 15 14]]
'''
(3)获取数组的行和列
x3 = np.random.randint(20,size=(3,4))
print(x3)
'''
[[ 8 13 0 13]
[17 6 12 7]
[14 14 1 2]]
'''
print(x3[1,:]) # 第一行,从0开始计数 [17 6 12 7]
print(x3[1]) # 第一行简写
print(x3[:, 2]) # 第二列,从0开始计数 [ 0 12 1]
(4)切片获取的视图,而非副本
①视图元素发生修改,则原数组亦发生相应修改
x4 = np.random.randint(20,size=(3,4))
print(x4)
'''
[[ 2 19 8 13]
[ 5 4 3 16]
[ 7 0 9 18]]
'''
x5 = x4[:2, :2]
print(x5)
'''
[[ 2 19]
[ 5 4]]
'''
x5[0,0] = 0
print(x4)
'''
[[ 0 19 8 13]
[ 5 4 3 16]
[ 7 0 9 18]]
'''
②切片时,生成一个副本:copy
x4 = np.random.randint(20,size=(3,4))
print(x4)
'''
[[ 1 8 4 6]
[13 14 12 13]
[10 13 1 18]]
'''
x5 = x4[:2, :2].copy()
print(x5)
'''
[[ 1 8]
[13 14]]
'''
x5[0,0] = 0
print(x4)
'''
[[ 1 8 4 6]
[13 14 12 13]
[10 13 1 18]]
'''
4、数组的变形
x1 = np.random.randint(0,10,(12,))
print(x1) # [3 3 0 5 8 5 8 1 6 6 9 6]
print(x1.shape) # (12,)
x2 = x1.reshape(3,4)
print(x2)
'''
[[3 3 0 5]
[8 5 8 1]
[6 6 9 6]]
'''
注:reshape返回的是视图,不是副本
(1)一维向量转行向量
x1 = np.random.randint(0,10,(12,))
print(x1) # [5 2 0 5 8 0 1 9 1 8 5 5]
print(x1.shape) # (12,)
x2 = x1.reshape(1,x1.shape[0])
print(x2) # [[5 2 0 5 8 0 1 9 1 8 5 5]]
# 也可以如下写法
x3 = x1[np.newaxis, :]
print(x3) # [[5 2 0 5 8 0 1 9 1 8 5 5]]
(2)一维向量转列向量
x1 = np.random.randint(0,10,(12,))
print(x1) # [6 9 9 3 5 7 2 8 2 4 8 4]
print(x1.shape) # (12,)
x2 = x1.reshape(x1.shape[0],1)
print(x2)
x3 = x1[:,np.newaxis]
'''
[[6]
[9]
[9]
[3]
[5]
[7]
[2]
[8]
[2]
[4]
[8]
[4]]
'''
(3)多维向量转一维向量
注:flatten返回的副本
x1 = np.random.randint(0,10,(3,4))
print(x1)
'''
[[6 3 8 3]
[6 9 2 2]
[8 0 5 9]]
'''
x2 = x1.flatten()
print(x2) # [6 3 8 3 6 9 2 2 8 0 5 9]
注:ravel返回的是视图
x1 = np.random.randint(0,10,(3,4))
print(x1)
'''
[[7 2 2 6]
[4 3 1 0]
[5 8 6 4]]
'''
x2 = x1.ravel()
print(x2) # [7 2 2 6 4 3 1 0 5 8 6 4]
注:reshape返回的是视图
x1 = np.random.randint(0,10,(3,4))
print(x1)
'''
[[7 2 2 6]
[4 3 1 0]
[5 8 6 4]]
'''
x2 = x1.reshape(-1)
print(x2) # [7 2 2 6 4 3 1 0 5 8 6 4]
5、数组的拼接
(1)水平拼接——非视图
import numpy as np
x1 = np.array([[1, 2, 3], [4, 5, 6]])
x2 = np.array([[7,8],[9,0]])
x3 = np.hstack([x1,x2])
print(x3)
'''
[[1 2 3 7 8]
[4 5 6 9 0]]
'''
# 也可以如下形式
x4 = np.c_[x1,x2]
print(x4)
(2)垂直拼接——非视图
import numpy as np
x1 = np.array([[1, 2, 3], [4, 5, 6]])
x2 = np.array([[7,8,9],[10,11,12]])
x3 = np.vstack([x1,x2])
print(x3)
'''
[[ 1 2 3]
[ 4 5 6]
[ 7 8 9]
[10 11 12]]
'''
# 也可以如下形式
x4 = np.r_[x1,x2]
print(x4)
6、数组的分裂
(1)split的用法
import numpy as np
x = np.arange(10)
print(x) # [0 1 2 3 4 5 6 7 8 9]
x1, x2, x3 = np.split(x, [2, 7])
print(x1, x2, x3) # [0 1] [2 3 4 5 6] [7 8 9]
(2)水平分割:hsplit的用法
import numpy as np
x = np.arange(25).reshape(5,5)
print(x)
'''
[[ 0 1 2 3 4]
[ 5 6 7 8 9]
[10 11 12 13 14]
[15 16 17 18 19]
[20 21 22 23 24]]
'''
left,middle,right = np.hsplit(x,[2,4])
print("left:\n",left)
print("middle:\n",middle)
print("right:\n",right)
'''
left:
[[ 0 1]
[ 5 6]
[10 11]
[15 16]
[20 21]]
middle:
[[ 2 3]
[ 7 8]
[12 13]
[17 18]
[22 23]]
right:
[[ 4]
[ 9]
[14]
[19]
[24]]
'''
(3)垂直分割:vsplit的用法
import numpy as np
x = np.arange(25).reshape(5,5)
print(x)
'''
[[ 0 1 2 3 4]
[ 5 6 7 8 9]
[10 11 12 13 14]
[15 16 17 18 19]
[20 21 22 23 24]]
'''
upper,middle,lower = np.vsplit(x,[2,4])
print("upper:\n",upper)
print("middle:\n",middle)
print("lower:\n",lower)
'''
upper:
[[0 1 2 3 4]
[5 6 7 8 9]]
middle:
[[10 11 12 13 14]
[15 16 17 18 19]]
lower:
[[20 21 22 23 24]]
'''
四、numpy四大运算
1、向量化运算
(1)与数字的加减乘除等运算
import numpy as np
x1 = np.arange(1,6)
print(x1) # [1 2 3 4 5]
print("x1+2",x1+2) # x1+2 [3 4 5 6 7]
print("x1-2",x1-2) # x1-2 [-1 0 1 2 3]
print("x1*2",x1*2) # x1*2 [ 2 4 6 8 10]
print("x1/2",x1/2) # x1/2 [0.5 1. 1.5 2. 2.5]
print("-x1",-x1) # -x1 [-1 -2 -3 -4 -5]
print("x1**2",x1**2) # x1**2 [ 1 4 9 16 25]
print("x1//2",x1//2) # x1//2 [0 1 1 2 2]
print("x1%2",x1%2) # x1%2 [1 0 1 0 1]
(2)绝对值、三角函数、指数、对数
x1 = np.array([0,-1,-2,4])
print(x1) # [ 0 -1 -2 4]
print(np.abs(x1)) # [0 1 2 4]
print(abs(x1)) # [0 1 2 4]
t = np.linspace(0,np.pi,3)
print(t) # [0. 1.57079633 3.14159265]
print("sin:",np.sin(t))
print("cos:",np.cos(t))
print("tan:",np.tan(t))
'''
sin: [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos: [ 1.000000e+00 6.123234e-17 -1.000000e+00]
tan: [ 0.00000000e+00 1.63312394e+16 -1.22464680e-16]
'''
x2 = np.array([1,0,-1])
print("arcsin:",np.arcsin(x2))
print("arccos:",np.arccos(x2))
print("arctan:",np.arctan(x2))
'''
arcsin: [ 1.57079633 0. -1.57079633]
arccos: [0. 1.57079633 3.14159265]
arctan: [ 0.78539816 0. -0.78539816]
'''
x2 = np.arange(3)
print(x2) # [0 1 2]
print(np.exp(x2)) # [1. 2.71828183 7.3890561 ]
x2 = np.array([1,2,4,8,10])
print("ln:",np.log(x2)) # ln: [0. 0.69314718 1.38629436 2.07944154 2.30258509]
print("log2:",np.log2(x2)) # log2: [0. 1. 2. 3. 3.32192809]
print("log10",np.log10(x2)) # log10 [0. 0.30103 0.60205999 0.90308999 1. ]
(3)两个数组的运算
x1 = np.arange(1,6)
print(x1) # [1 2 3 4 5]
x2 = np.arange(6,11)
print(x2) # [ 6 7 8 9 10]
print("x1+x2:",x1+x2) # x1+x2: [ 7 9 11 13 15]
print("x1*x2:",x1*x2) # x1*x2: [ 6 14 24 36 50]
print("x1/x2:",x1/x2) # x1/x2: [0.16666667 0.28571429 0.375 0.44444444 0.5 ]
2、矩阵运算
(1)矩阵的转置
x1 = np.arange(9).reshape(3,3)
print(x1)
'''
[[0 1 2]
[3 4 5]
[6 7 8]]
'''
x2 = x1.T
print(x2)
'''
[[0 3 6]
[1 4 7]
[2 5 8]]
'''
(2)矩阵乘法
x1 = np.array([[1,0],
[1,1]])
x2 = np.array([[0,1],
[1,1]])
print(x1.dot(x2))
print(np.dot(x1,x2)) #结果同上
'''
[[0 1]
[1 2]]
'''
3、广播运算
规则:如果两个数组的形状在维度上不匹配,那么数组的形式会沿着维度为1的维度进行扩展,以匹配另一个数组的形状。
x1 = np.arange(3).reshape(1,3)
print(x1) # [[0 1 2]]
print(x1 + 5) # [[5 6 7]]
4、比较运算和掩码
(1)比较运算
x1 = np.random.randint(50,size=(5,5))
print(x1)
'''
[[46 42 35 7 33]
[34 16 7 33 26]
[20 17 13 34 29]
[23 15 14 47 30]
[37 47 23 5 25]]
'''
print(x1>25)
'''
[[ True True True False True]
[ True False False True True]
[False False False True True]
[False False False True True]
[ True True False False False]]
'''
(2)操作布尔数组
x1 = np.random.randint(10,size=(3,4))
print(x1)
'''
[[4 3 9 9]
[3 8 1 1]
[4 0 9 0]]
'''
print(x1>5)
'''
[[False False True True]
[False True False False]
[False False True False]]
'''
print(np.sum(x1>5)) # 4 返回True相当于1
x1 = np.random.randint(10,size=(3,4))
print(x1)
print(x1>5)
print(np.all(x1>0)) # 判断所有元素是否均大于0
print(np.any(x1==6)) # 判断是否有一个元素等于6
print(np.all(x1<9,axis=1)) # 按行进行判断
print(np.sum((x1<9) & (x1>5)))
(3)将布尔数组作为掩码
x1 = np.random.randint(10,size=(3,4))
print(x1)
'''
[[0 7 0 6]
[1 5 6 2]
[0 6 5 4]]
'''
print(x1[x1>5]) # [7 6 6 6]
5、其他的索引方式
(1)一维数组
x1 = np.random.randint(100,size=5)
print(x1) # [88 59 18 70 6]
ind = np.array([[4,2],
[2,3]])
print(x1[ind]) # 数组形状与索引数组ind相同
'''
[[ 6 18]
[18 70]]
'''
(2)多维数组
x1 = np.arange(12).reshape(3,4)
print(x1)
'''
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
'''
row = np.array([0,1,2])
col = np.array([1,3,0])
print(x1[row,col]) # [1 7 8]
print(row[:,np.newaxis])
'''
[[0]
[1]
[2]]
'''
print(x1[row[:,np.newaxis],col])
'''
[[ 1 3 0]
[ 5 7 4]
[ 9 11 8]]
'''
五、numpy的其他通用函数
1、数值排序
x = np.random.randint(20,50,size=5)
print(x) # [39 39 33 35 28]
print(np.sort(x)) # [28 33 35 39 39] 产生新的排序数组
print(x) # [39 39 33 35 28]
x.sort() # 替换原数组
print(x) # [28 33 35 39 39]
获得排序索引
x = np.random.randint(20,50,size=5)
print(x) # [21 36 31 49 35]
i = np.argsort(x)
print(i) # [0 2 4 1 3]
2、最大值最小值
x = np.random.randint(20,50,size=5)
print(x) # [49 28 37 49 49]
print("max:",np.max(x))
print("min:",np.min(x))
print("max_index:",np.argmax(x))
print("min_index:",np.argmin(x))
'''
max: 49
min: 28
max_index: 0
min_index: 1
'''
3、数值求和求积
x = np.arange(1,6)
print(x) # [1 2 3 4 5]
print(x.sum(),np.sum(x)) # 15 15
x = np.arange(6).reshape(2,3)
print(x)
'''
[[0 1 2]
[3 4 5]]
'''
print("按行求和:",np.sum(x,axis=1)) # [ 3 12]
print("按列求和:",np.sum(x,axis=0)) # [3 5 7]
x = np.arange(1,6) # [1 2 3 4 5]
print(x)
print("求积:",x.prod(),np.prod(x)) # 求积: 120 120
4、中位数、均值、方差、标准差
import numpy as np
import matplotlib.pyplot as plt
x = np.random.normal(0,1,size=10000)
plt.hist(x,bins=50)
plt.show()
print("中位数:",np.median(x))
print("均值:",np.mean(x),x.mean())
print("方差:",np.var(x),x.var())
print("标准差:",np.std(x),x.std())
'''
中位数: 0.0009870773154921067
均值: -0.003600455719446233 -0.003600455719446233
方差: 0.9775646593314912 0.9775646593314912
标准差: 0.9887186957529888 0.9887186957529888
'''