前言
- numpy模块的核心就是基于数组的运算,相比于列表和其他数据结构,数组的运算效率是最高的。在数据分析中,经常会使用到numpy模块的函数,本文将介绍常用的数学函数和逻辑函数。
一、向量化和广播
- 两个数组的维度不同,但是它们的后缘维度相等。
x = np.arange(4)
print(x) # [0 1 2 3]
print(x.shape) # (4,)
y = np.ones((3, 4))
print(y)
# [[1. 1. 1. 1.]
# [1. 1. 1. 1.]
# [1. 1. 1. 1.]]
print(y.shape) # (3, 4)
# y的shape为(3, 4),x的shape为(4,)。可以说前者是二维的,而后者是一维的。但是它们的后缘维度相等,y的第二维长度为3,和x的维度相同,因此可以相加减。
print(x + y)
# [[1. 2. 3. 4.]
# [1. 2. 3. 4.]
# [1. 2. 3. 4.]]
print((x + y).shape) # (3, 4)
- 两个数组维度不同,但是其中有维度是1。
x = np.arange(4).reshape(4, 1)
print(x)
# [[0]
# [1]
# [2]
# [3]]
print(x.shape) # (4, 1)
y = np.ones(5)
print(y)
print(y.shape) # (5,)
print(x + y) # 4行1列 + 1行5列 = 4行5列
# [[1. 1. 1. 1. 1.]
# [2. 2. 2. 2. 2.]
# [3. 3. 3. 3. 3.]
# [4. 4. 4. 4. 4.]]
x = np.array([0.0, 10.0, 20.0, 30.0])
y = np.array([1.0, 2.0, 3.0])
print(x[:, np.newaxis]) # 增加一个维度
# [[ 0.]
# [10.]
# [20.]
# [30.]]
z = x[:, np.newaxis] + y
print(z) # 4行1列 + 1行3列 = 4行3列
# [[ 1. 2. 3.]
# [11. 12. 13.]
# [21. 22. 23.]
# [31. 32. 33.]]
- 两个数组维度不同,且没有任何一个维度是1,则匹配引发错误。
x = np.arange(4)
print(x) # [0 1 2 3]
print(x.shape) # (4,)
y = np.ones(5)
print(y) # [1. 1. 1. 1. 1.]
print(y.shape) # (5,)
print(x + y)
# ValueError: operands could not be broadcast together with shapes (4,) (5,)
二、数学函数
1.ndarray与数字运算
- 加
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = x + 1
print(y)
print(np.add(x, 1))
# [2 3 4 5 6 7 8 9]
- 减
y = x - 1
print(y)
print(np.subtract(x, 1))
# [0 1 2 3 4 5 6 7]
- 乘
y = x * 2
print(y)
print(np.multiply(x, 2))
# [ 2 4 6 8 10 12 14 16]
- 除
y = x / 2
print(y)
print(np.divide(x, 2))
# [0.5 1. 1.5 2. 2.5 3. 3.5 4. ]
- 整除
y = x // 2
print(y)
print(np.floor_divide(x, 2))
# [0 1 1 2 2 3 3 4]
- 平方
y = x ** 2
print(y)
print(np.power(x, 2))
# [ 1 4 9 16 25 36 49 64]
2.ndarray与ndarray运算
(1)一维数组与多维数组
- 数组
x = np.array([[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
[21, 22, 23, 24, 25],
[26, 27, 28, 29, 30],
[31, 32, 33, 34, 35]])
y = np.arange(1, 6)
print(y)
# [1 2 3 4 5]
- 加
z = x + y
print(z)
print(np.add(x, y))
# [[12 14 16 18 20]
# [17 19 21 23 25]
# [22 24 26 28 30]
# [27 29 31 33 35]
# [32 34 36 38 40]]
- 减
z = x - y
print(z)
print(np.subtract(x, y))
# [[10 10 10 10 10]
# [15 15 15 15 15]
# [20 20 20 20 20]
# [25 25 25 25 25]
# [30 30 30 30 30]]
- 乘
z = x * y
print(z)
print(np.multiply(x, y))
# [[ 11 24 39 56 75]
# [ 16 34 54 76 100]
# [ 21 44 69 96 125]
# [ 26 54 84 116 150]
# [ 31 64 99 136 175]]
- 除
z = x / y
print(z)
print(np.divide(x, y))
# [[11. 6. 4.33333333 3.5 3. ]
# [16. 8.5 6. 4.75 4. ]
# [21. 11. 7.66666667 6. 5. ]
# [26. 13.5 9.33333333 7.25 6. ]
# [31. 16. 11. 8.5 7. ]]
- 整除
z = x // y
print(z)
print(np.floor_divide(x, y))
# [[11 6 4 3 3]
# [16 8 6 4 4]
# [21 11 7 6 5]
# [26 13 9 7 6]
# [31 16 11 8 7]]
- 平方
y = x**2
print(y)
z = x ** np.full([1, 5], 2)
print(z)
print(np.power(x, np.full([5, 5], 2)))
# [[ 121 144 169 196 225]
# [ 256 289 324 361 400]
# [ 441 484 529 576 625]
# [ 676 729 784 841 900]
# [ 961 1024 1089 1156 1225]]
(2)多维数组与多维数组
- 数组
x = np.array([[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
[21, 22, 23, 24, 25],
[26, 27, 28, 29, 30],
[31, 32, 33, 34, 35]])
y = np.arange(1, 26).reshape([5, 5])
print(y)
# [[ 1 2 3 4 5]
# [ 6 7 8 9 10]
# [11 12 13 14 15]
# [16 17 18 19 20]
# [21 22 23 24 25]]
- 加
z = x + y
print(z)
print(np.add(x, y))
# [[12 14 16 18 20]
# [22 24 26 28 30]
# [32 34 36 38 40]
# [42 44 46 48 50]
# [52 54 56 58 60]]
- 减
z = x - y
print(z)
print(np.subtract(x, y))
# [[10 10 10 10 10]
# [10 10 10 10 10]
# [10 10 10 10 10]
# [10 10 10 10 10]
# [10 10 10 10 10]]
- 乘
z = x * y
print(z)
print(np.multiply(x, y))
# [[ 11 24 39 56 75]
# [ 96 119 144 171 200]
# [231 264 299 336 375]
# [416 459 504 551 600]
# [651 704 759 816 875]]
- 除
z = x / y
print(z)
print(np.divide(x, y))
# [[11. 6. 4.33333333 3.5 3. ]
# [ 2.66666667 2.42857143 2.25 2.11111111 2. ]
# [ 1.90909091 1.83333333 1.76923077 1.71428571 1.66666667]
# [ 1.625 1.58823529 1.55555556 1.52631579 1.5 ]
# [ 1.47619048 1.45454545 1.43478261 1.41666667 1.4 ]]
- 整除
z = x // y
print(z)
print(np.floor_divide(x, y))
# [[11 6 4 3 3]
# [ 2 2 2 2 2]
# [ 1 1 1 1 1]
# [ 1 1 1 1 1]
# [ 1 1 1 1 1]
3.功能函数
(1)开根、平方
- numpy.sqrt()
x = np.arange(1, 5)
print(x) # [1 2 3 4]
y = np.sqrt(x)
print(y)
# [1. 1.41421356 1.73205081 2. ]
print(np.power(x, 0.5))
# [1. 1.41421356 1.73205081 2. ]
- numpy.square()
y = np.square(x)
print(y)
# [ 1 4 9 16]
print(np.power(x, 2))
# [ 1 4 9 16]
(2)三角函数
- numpy.sin()
x = np.linspace(start=0, stop=np.pi / 2, num=10)
print(x)
# [0. 0.17453293 0.34906585 0.52359878 0.6981317 0.87266463
# 1.04719755 1.22173048 1.3962634 1.57079633]
y = np.sin(x)
print(y)
# [0. 0.17364818 0.34202014 0.5 0.64278761 0.76604444
# 0.8660254 0.93969262 0.98480775 1. ]
- numpy.arcsin()
z = np.arcsin(y)
print(z)
# [0. 0.17453293 0.34906585 0.52359878 0.6981317 0.87266463
# 1.04719755 1.22173048 1.3962634 1.57079633]
- numpy.cos()
y = np.cos(x)
print(y)
# [1.00000000e+00 9.84807753e-01 9.39692621e-01 8.66025404e-01
# 7.66044443e-01 6.42787610e-01 5.00000000e-01 3.42020143e-01
# 1.73648178e-01 6.12323400e-17]
- numpy.arccos()
z = np.arccos(y)
print(z)
# [0. 0.17453293 0.34906585 0.52359878 0.6981317 0.87266463
# 1.04719755 1.22173048 1.3962634 1.57079633]
- numpy.tan()
y = np.tan(x)
print(y)
# [0.00000000e+00 1.76326981e-01 3.63970234e-01 5.77350269e-01
# 8.39099631e-01 1.19175359e+00 1.73205081e+00 2.74747742e+00
# 5.67128182e+00 1.63312394e+16]
- numpy.arctan()
z = np.arctan(y)
print(z)
# [0. 0.17453293 0.34906585 0.52359878 0.6981317 0.87266463
# 1.04719755 1.22173048 1.3962634 1.57079633]
(3)指数、对数
- 底数为e
x = np.arange(1, 5)
print(x)
# [1 2 3 4]
y = np.exp(x)
print(y)
# [ 2.71828183 7.3890561 20.08553692 54.59815003]
z = np.log(y)
print(z)
# [1. 2. 3. 4.]
- 底数为2
y1 = np.exp2(x)
print(y1)
# [ 2. 4. 8. 16.]
z1 = np.log2(y1)
print(z1)
# [1. 2. 3. 4.]
- 底数为10
x2 = np.array([10,100,1000])
z2 = np.log10(x2)
print(z2)
# [1. 2. 3.]
(4)加法、乘法
- numpy.sum()
x = np.array([[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
[21, 22, 23, 24, 25],
[26, 27, 28, 29, 30],
[31, 32, 33, 34, 35]])
y = np.sum(x) # 整体求和
print(y) # 575
y = np.sum(x, axis=0) # 每列求和
print(y) # [105 110 115 120 125]
y = np.sum(x, axis=1) # 每行求和
print(y) # [ 65 90 115 140 165]
- numpy.cumsum()
x = np.array([[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
[21, 22, 23, 24, 25],
[26, 27, 28, 29, 30],
[31, 32, 33, 34, 35]])
y = np.cumsum(x)
print(y)
# [ 11 23 36 50 65 81 98 116 135 155 176 198 221 245 270 296 323 351
# 380 410 441 473 506 540 575]
y = np.cumsum(x, axis=0) # 沿行求和
print(y)
# [[ 11 12 13 14 15]
# [ 27 29 31 33 35]
# [ 48 51 54 57 60]
# [ 74 78 82 86 90]
# [105 110 115 120 125]]
y = np.cumsum(x, axis=1) # 沿列求和
print(y)
# [[ 11 23 36 50 65]
# [ 16 33 51 70 90]
# [ 21 43 66 90 115]
# [ 26 53 81 110 140]
# [ 31 63 96 130 165]]
- numpy.prod()
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(x)
# [[1 2 3]
# [4 5 6]
# [7 8 9]]
y = np.prod(x) # 数组所有元素乘积
print(y) # 362880
y = np.prod(x, axis=0) # 整列乘积
print(y)
# [ 28 80 162]
y = np.prod(x, axis=1) # 整行乘积
print(y)
# [ 6 120 504]
- numpy.cumprod()
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(x)
# [[1 2 3]
# [4 5 6]
# [7 8 9]]
y = np.cumprod(x)
print(y) # [ 1 2 6 24 120 720 5040 40320 362880]
y = np.cumprod(x, axis=0) # 沿行累乘
print(y)
# [[ 1 2 3]
# [ 4 10 18]
# [ 28 80 162]]
y = np.cumprod(x, axis=1) # 沿列累乘
print(y)
# [[ 1 2 6]
# [ 4 20 120]
# [ 7 56 504]]
- numpy.diff()
A = np.arange(1, 13).reshape((3, 4))
A[1, 1] = 8
print(A)
# [[ 1 2 3 4]
# [ 5 8 7 8]
# [ 9 10 11 12]]
print(np.diff(A, axis=0)) # 沿行作差,返回每行减上一行的值
# [[4 6 4 4]
# [4 2 4 4]]
print(np.diff(A, axis=1)) # 沿列作差,返回每列减上一列的值
# [[ 1 1 1]
# [ 3 -1 1]
# [ 1 1 1]]
(5)四舍五入
- numpy.around()
x = np.random.rand(3, 3) * 10
print(x)
# [[8.12571261 9.51953169 4.48948222]
# [5.75500584 4.08345588 1.35425003]
# [6.13208871 5.40958798 1.83365735]]
y = np.around(x) # 四舍五入取整
print(y)
# [[ 8. 10. 4.]
# [ 6. 4. 1.]
# [ 6. 5. 2.]]
z = np.around(x, 2) # 四舍五入保留两位小数
print(z)
# [[8.13 9.52 4.49]
# [5.76 4.08 1.35]
# [6.13 5.41 1.83]]
- numpy.ceil() & numpy.floor()
x = np.random.rand(3, 3) * 10
print(x)
# [[0.61906644 7.94810517 6.38299531]
# [6.30105525 1.57071531 4.07112856]
# [2.2610421 6.42085375 4.240439 ]]
y = np.ceil(x) # 向上取整
print(y)
# [[1. 8. 7.]
# [7. 2. 5.]
# [3. 7. 5.]]
z = np.floor(x) # 向下取整
print(z)
# [[0. 7. 6.]
# [6. 1. 4.]
# [2. 6. 4.]]
(6)杂项
- numpy.clip()
x = np.array([[11, 12, 13, 14, 15],
[16, 17, 18, 19, 20],
[21, 22, 23, 24, 25],
[26, 27, 28, 29, 30],
[31, 32, 33, 34, 35]])
y = np.clip(x, a_min=15, a_max=30) # 裁剪小于15和大于30的数
print(y)
# [[15 15 15 15 15]
# [16 17 18 19 20]
# [21 22 23 24 25]
# [26 27 28 29 30]
# [30 30 30 30 30]]
- numpy.abs() & numpy.sign()
x = np.arange(-5, 5)
print(x)
# [-5 -4 -3 -2 -1 0 1 2 3 4]
y = np.abs(x) # 求绝对值
print(y)
# [5 4 3 2 1 0 1 2 3 4]
z = np.sign(x) # 转换成符号函数
print(z)
# [-1 -1 -1 -1 -1 0 1 1 1 1]
三、逻辑函数
1.真值测试
- numpy.all() & numpy.any()
a = np.array([0, 4, 5])
b = np.copy(a)
print(np.all(a == b)) # True
print(np.any(a == b)) # True
b[0] = 1
print(np.all(a == b)) # False
print(np.any(a == b)) # True
- numpy.isnan()
x = np.array([1, 2, np.nan])
print(np.isnan(x))
# [False False True]
2.逻辑运算
- 非 —— numpy.logial_not()
print(np.logical_not([True, False, 0, 1]))
# [False True True False]
x = np.arange(5)
print(np.logical_not(x < 3))
# [False False False True True]
- 与 —— np.logical_and()
print(np.logical_and(True, False))
# False
print(np.logical_and([True, False], [True, False]))
# [ True False]
x = np.arange(5)
print(np.logical_and(x > 1, x < 4))
# [False False True True False]
- 或 —— np.logical_or()
print(np.logical_or(True, False))
# True
print(np.logical_or([True, False], [False, False]))
# [ True False]
x = np.arange(5)
print(np.logical_or(x < 1, x > 3))
# [ True False False False True]
- 异或 —— np.logical_xor()
print(np.logical_xor(True, False))
# True
print(np.logical_xor([True, True, False, False], [True, False, True, False]))
# [False True True False]
print(np.eye(2))
# [[1. 0.]
# [0. 1.]]
print(np.logical_xor(0, np.eye(2)))
# [[ True False]
# [False True]]
3.对照
- numpy.greater()
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = x > 2
print(y)
print(np.greater(x, 2))
# [False False True True True True True True]
- np.greater_equal()
y = x >= 2
print(y)
print(np.greater_equal(x, 2))
# [False True True True True True True True]
- np.equal()
y = x == 2
print(y)
print(np.equal(x, 2))
# [False True False False False False False False]
- np.not_equal()
y = x != 2
print(y)
print(np.not_equal(x, 2))
# [ True False True True True True True True]
- np.less()
y = x < 2
print(y)
print(np.less(x, 2))
# [ True False False False False False False False]
- np.less_equal()
y = x <= 2
print(y)
print(np.less_equal(x, 2))
# [ True True False False False False False False]
- numpy.isclose() & numpy.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False
- 如果两个数组在给定的公差范围内相等,则返回True;否则,返回True。 否则为假。当以下式子结果为True时,函数的返回的结果是True:
absolute(a - b) <= (atol + rtol * absolute(b))
x = np.isclose([1e10, 1e-7], [1.00001e10, 1e-8])
print(x) # [ True False]
x = np.allclose([1e10, 1e-7], [1.00001e10, 1e-8])
print(x) # False
x = np.isclose([1.0, np.nan], [1.0, np.nan])
print(x) # [ True False]
x = np.allclose([1.0, np.nan], [1.0, np.nan])
print(x) # False
# 若 np.nan 在两个ndarray的同一位置,且 equal_nan=True,则认为二者相等
x = np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
print(x) # [ True True]
x = np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
print(x) # True