2 numpy

1 numpy的优势

1.1 介绍

开源的python科学计算库,用于快速处理任意维度的数组

是一个快速而灵活的大数据容器

1.2 ndarray介绍

n维数组类型

  • ndarray存储数据
import numpy as np

score = np.array([[80, 89, 86, 67, 79],
[78, 97, 89, 67, 81],
[90, 94, 78, 67, 74],
[91, 91, 90, 67, 69],
[76, 87, 75, 67, 86],
[70, 79, 84, 67, 84],
[94, 92, 93, 67, 64],
[86, 85, 83, 67, 80]])

score
array([[80, 89, 86, 67, 79],
       [78, 97, 89, 67, 81],
       [90, 94, 78, 67, 74],
       [91, 91, 90, 67, 69],
       [76, 87, 75, 67, 86],
       [70, 79, 84, 67, 84],
       [94, 92, 93, 67, 64],
       [86, 85, 83, 67, 80]])

1.3 ndarray与python原生list运算效率对比

import random
import time
import numpy as np

a = []
for i in range(10000000):
    a.append(random.random())

%time sum1 = sum(a)

b = np.array(a)

%time sum2 = np.sum(b)
CPU times: user 199 ms, sys: 396 ms, total: 595 ms
Wall time: 667 ms
CPU times: user 16.7 ms, sys: 736 µs, total: 17.4 ms
Wall time: 16.5 ms

1.4 ndarrat的优势

  • 内存块风格

ndarray在存储数据的时候,数据与数据地址都是连续的

而python原生list只能通过寻址方式找到下一个元素

  • ndarray支持并行化运算
  • 底层使用C语言,内部解除了GIL

2 N维数组-ndarray

2.1 ndarray的属性

a = np.array([[1,2,3],[4,5,6]])
b = np.array([1,2,3,4])
c = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
  • 数组维度的元组
print(a.shape)
print(b.shape)
print(c.shape)
(2, 3)
(4,)
(2, 2, 3)
  • 数组维度
print(a.ndim)
print(b.ndim)
print(c.ndim)
2
1
3
  • 数组中的元素数量
print(a.size)
print(b.size)
print(c.size)
6
4
12
  • 数组元素的长度(字节)
print(a.itemsize)
print(b.itemsize)
print(c.itemsize)
8
8
8
  • 数组元素的类型
print(a.dtype)
print(b.dtype)
print(c.dtype)
int64
int64
int64

2.2 ndarry的类型

np.bool

np.int8-64

np.uint8-64

np.float16-64

np.complex64-128

np.object_

np.string_

np.unicode_

# 创建数组时指定类型
a = np.array([[1,2,3], [4,5,6]], dtype=np.float32)
print(a)
a.dtype
[[1. 2. 3.]
 [4. 5. 6.]]

dtype('float32')

3 基本操作

3.1 生成0和1的数组

zero = np.zeros([3,4])
zero
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])
ones = np.ones_like(zero)
ones
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

3.2 从现有数组生成

a = np.array([[1,2,3],[4,5,6]])
# 深拷贝
a1 = np.array(a)
# 浅拷贝
a2 = np.asarray(a)
a
array([[1, 2, 3],
       [4, 5, 6]])
a[1] = 0
a
array([[1, 2, 3],
       [0, 0, 0]])
a1
array([[1, 2, 3],
       [4, 5, 6]])
a2
array([[1, 2, 3],
       [0, 0, 0]])
a = np.array([[1,2,3],[4,5,6]])
# 深拷贝
a1 = np.array(a)
# 浅拷贝
a2 = np.asarray(a)
a[1][0] = 1
a
array([[1, 2, 3],
       [1, 5, 6]])
a1
array([[1, 2, 3],
       [4, 5, 6]])
a2
array([[1, 2, 3],
       [1, 5, 6]])

3.3 生成固定范围的数组

# 生成等间隔的序列
np.linspace(0, 100, 11)
array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])
# 类似range
np.arange(10, 50, 2)
array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,
       44, 46, 48])
# 生成10^x
np.logspace(0, 2, 3)
array([  1.,  10., 100.])

3.4 生成随机数组

# 均匀分布
x1 = np.random.uniform(-1, 1, 1000000)
x1
array([-0.64396627,  0.45418466,  0.18690437, ...,  0.24564599,
       -0.60393121,  0.42554518])
import matplotlib.pyplot as plt

plt.figure()
plt.hist(x=x1, bins=1000)
plt.show()

在这里插入图片描述

# 正太分布
x2 = np.random.normal(1.75, 1, 100000000)

plt.figure()
plt.hist(x2, 1000)

plt.show()

在这里插入图片描述

# 随机生成8只股票2周的交易日涨幅数据
stock_change = np.random.normal(0, 1, (8, 10))
stock_change
array([[-0.39505242,  0.90826077, -0.21782191, -2.579644  , -1.80536267,
         0.0761092 ,  0.22959365, -0.27668752, -0.61130237, -1.32768396],
       [ 0.55786366, -0.52183062, -0.58764324, -0.83781052,  1.44944257,
         0.30971043, -1.70501328,  0.11655112, -0.78857542,  0.15397265],
       [-0.87916493,  0.16126835, -0.37054848,  0.20157712,  0.05141507,
         0.84445257,  0.19210461, -0.12467897,  0.15308413, -0.45694432],
       [ 1.25422764, -0.50528106, -1.74411687, -0.45714352,  0.19273938,
        -0.27121733,  1.41678988,  1.77644812,  0.34032522, -0.29910002],
       [-0.79216497, -0.98200844,  0.53838896, -1.53033656, -0.9257245 ,
        -1.72211499,  1.38017619,  0.41968168, -1.7403562 , -0.72412437],
       [-0.28304912, -1.64657941, -0.95650248,  0.64653616, -1.07762432,
        -1.17113291, -0.71637017,  0.28357741,  0.40984885, -0.20729397],
       [-2.13329804, -0.46327641, -0.66551884,  0.37347056,  0.95767147,
         0.29573325,  1.72222028,  0.89640279,  0.66437657,  0.09138068],
       [ 2.10734775, -0.67210195,  1.43512674,  2.28827819, -0.2605319 ,
        -2.26863744, -0.84161926, -0.07581769, -1.65233412,  0.03896541]])

3.5 数组的索引、切片

# 二维的数组
stock_change[0, 0:3]
array([-0.39505242,  0.90826077, -0.21782191])
stock_change[3:, 3:]
array([[-0.45714352,  0.19273938, -0.27121733,  1.41678988,  1.77644812,
         0.34032522, -0.29910002],
       [-1.53033656, -0.9257245 , -1.72211499,  1.38017619,  0.41968168,
        -1.7403562 , -0.72412437],
       [ 0.64653616, -1.07762432, -1.17113291, -0.71637017,  0.28357741,
         0.40984885, -0.20729397],
       [ 0.37347056,  0.95767147,  0.29573325,  1.72222028,  0.89640279,
         0.66437657,  0.09138068],
       [ 2.28827819, -0.2605319 , -2.26863744, -0.84161926, -0.07581769,
        -1.65233412,  0.03896541]])
stock_change[:-3, :-3]
array([[-0.39505242,  0.90826077, -0.21782191, -2.579644  , -1.80536267,
         0.0761092 ,  0.22959365],
       [ 0.55786366, -0.52183062, -0.58764324, -0.83781052,  1.44944257,
         0.30971043, -1.70501328],
       [-0.87916493,  0.16126835, -0.37054848,  0.20157712,  0.05141507,
         0.84445257,  0.19210461],
       [ 1.25422764, -0.50528106, -1.74411687, -0.45714352,  0.19273938,
        -0.27121733,  1.41678988],
       [-0.79216497, -0.98200844,  0.53838896, -1.53033656, -0.9257245 ,
        -1.72211499,  1.38017619]])

3.6 形状修改

stock_change.reshape([10, 8])
array([[-0.39505242,  0.90826077, -0.21782191, -2.579644  , -1.80536267,
         0.0761092 ,  0.22959365, -0.27668752],
       [-0.61130237, -1.32768396,  0.55786366, -0.52183062, -0.58764324,
        -0.83781052,  1.44944257,  0.30971043],
       [-1.70501328,  0.11655112, -0.78857542,  0.15397265, -0.87916493,
         0.16126835, -0.37054848,  0.20157712],
       [ 0.05141507,  0.84445257,  0.19210461, -0.12467897,  0.15308413,
        -0.45694432,  1.25422764, -0.50528106],
       [-1.74411687, -0.45714352,  0.19273938, -0.27121733,  1.41678988,
         1.77644812,  0.34032522, -0.29910002],
       [-0.79216497, -0.98200844,  0.53838896, -1.53033656, -0.9257245 ,
        -1.72211499,  1.38017619,  0.41968168],
       [-1.7403562 , -0.72412437, -0.28304912, -1.64657941, -0.95650248,
         0.64653616, -1.07762432, -1.17113291],
       [-0.71637017,  0.28357741,  0.40984885, -0.20729397, -2.13329804,
        -0.46327641, -0.66551884,  0.37347056],
       [ 0.95767147,  0.29573325,  1.72222028,  0.89640279,  0.66437657,
         0.09138068,  2.10734775, -0.67210195],
       [ 1.43512674,  2.28827819, -0.2605319 , -2.26863744, -0.84161926,
        -0.07581769, -1.65233412,  0.03896541]])
# -1表示待计算
stock_change.reshape([-1, 20])
array([[-0.39505242,  0.90826077, -0.21782191, -2.579644  , -1.80536267,
         0.0761092 ,  0.22959365, -0.27668752, -0.61130237, -1.32768396,
         0.55786366, -0.52183062, -0.58764324, -0.83781052,  1.44944257,
         0.30971043, -1.70501328,  0.11655112, -0.78857542,  0.15397265],
       [-0.87916493,  0.16126835, -0.37054848,  0.20157712,  0.05141507,
         0.84445257,  0.19210461, -0.12467897,  0.15308413, -0.45694432,
         1.25422764, -0.50528106, -1.74411687, -0.45714352,  0.19273938,
        -0.27121733,  1.41678988,  1.77644812,  0.34032522, -0.29910002],
       [-0.79216497, -0.98200844,  0.53838896, -1.53033656, -0.9257245 ,
        -1.72211499,  1.38017619,  0.41968168, -1.7403562 , -0.72412437,
        -0.28304912, -1.64657941, -0.95650248,  0.64653616, -1.07762432,
        -1.17113291, -0.71637017,  0.28357741,  0.40984885, -0.20729397],
       [-2.13329804, -0.46327641, -0.66551884,  0.37347056,  0.95767147,
         0.29573325,  1.72222028,  0.89640279,  0.66437657,  0.09138068,
         2.10734775, -0.67210195,  1.43512674,  2.28827819, -0.2605319 ,
        -2.26863744, -0.84161926, -0.07581769, -1.65233412,  0.03896541]])
# 会修改原数据
stock_change.resize([10, 8])
stock_change
array([[-0.39505242,  0.90826077, -0.21782191, -2.579644  , -1.80536267,
         0.0761092 ,  0.22959365, -0.27668752],
       [-0.61130237, -1.32768396,  0.55786366, -0.52183062, -0.58764324,
        -0.83781052,  1.44944257,  0.30971043],
       [-1.70501328,  0.11655112, -0.78857542,  0.15397265, -0.87916493,
         0.16126835, -0.37054848,  0.20157712],
       [ 0.05141507,  0.84445257,  0.19210461, -0.12467897,  0.15308413,
        -0.45694432,  1.25422764, -0.50528106],
       [-1.74411687, -0.45714352,  0.19273938, -0.27121733,  1.41678988,
         1.77644812,  0.34032522, -0.29910002],
       [-0.79216497, -0.98200844,  0.53838896, -1.53033656, -0.9257245 ,
        -1.72211499,  1.38017619,  0.41968168],
       [-1.7403562 , -0.72412437, -0.28304912, -1.64657941, -0.95650248,
         0.64653616, -1.07762432, -1.17113291],
       [-0.71637017,  0.28357741,  0.40984885, -0.20729397, -2.13329804,
        -0.46327641, -0.66551884,  0.37347056],
       [ 0.95767147,  0.29573325,  1.72222028,  0.89640279,  0.66437657,
         0.09138068,  2.10734775, -0.67210195],
       [ 1.43512674,  2.28827819, -0.2605319 , -2.26863744, -0.84161926,
        -0.07581769, -1.65233412,  0.03896541]])
# 转置
stock_change.shape
(10, 8)
stock_change.T.shape
(8, 10)

3.7 类型修改

stock_change.astype(np.int32)
array([[ 0,  0,  0, -2, -1,  0,  0,  0],
       [ 0, -1,  0,  0,  0,  0,  1,  0],
       [-1,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  1,  0],
       [-1,  0,  0,  0,  1,  1,  0,  0],
       [ 0,  0,  0, -1,  0, -1,  1,  0],
       [-1,  0,  0, -1,  0,  0, -1, -1],
       [ 0,  0,  0,  0, -2,  0,  0,  0],
       [ 0,  0,  1,  0,  0,  0,  2,  0],
       [ 1,  2,  0, -2,  0,  0, -1,  0]], dtype=int32)
# 转换为bytes
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[12, 3, 34], [5, 6, 7]]])
arr.tostring()
b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00"\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00'

3.8 数组的去重

temp = np.array([[1, 2, 3, 4], [3, 4, 5, 6]])
np.unique(temp)
array([1, 2, 3, 4, 5, 6])

4 ndarray运算

4.1 逻辑运算

stock_change = np.random.normal(0, 1, (8, 10))
tmp = stock_change[0:5, 0:5]
tmp
array([[-0.09452151,  0.21494329, -0.88702803,  1.27200298,  1.2467966 ],
       [ 0.22267303,  0.2302336 ,  0.07200738,  0.03890387,  0.3672513 ],
       [ 1.16974078,  0.61063169,  0.7513086 , -2.85986462,  0.96363815],
       [-0.89594979, -0.31032916,  1.75412849, -0.89782976, -2.49554601],
       [-0.11609947, -0.45015486,  0.86779026,  0.99905792,  1.14721481]])
tmp > 0
array([[False,  True, False,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True, False,  True],
       [False, False,  True, False, False],
       [False, False,  True,  True,  True]])
tmp[tmp > 0.5] = 1
tmp
array([[-0.09452151,  0.21494329, -0.88702803,  1.        ,  1.        ],
       [ 0.22267303,  0.2302336 ,  0.07200738,  0.03890387,  0.3672513 ],
       [ 1.        ,  1.        ,  1.        , -2.85986462,  1.        ],
       [-0.89594979, -0.31032916,  1.        , -0.89782976, -2.49554601],
       [-0.11609947, -0.45015486,  1.        ,  1.        ,  1.        ]])

4.2 通用判断函数

# 是否全大于0
np.all(stock_change[0:2, 0:5] > 0)
False
# 判断是否有大于0的
np.any(stock_change[0:2, 0:5] > 0)
True

4.3 三元运算符

# 大于0的置1,否则为0
temp = stock_change[:4, :4]
np.where(temp > 0, 1, 0)
array([[0, 1, 0, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 0],
       [0, 0, 1, 0]])
# 逻辑与
np.where(np.logical_and(temp > 0, temp < 0.5), 1, 0)
array([[0, 1, 0, 0],
       [1, 1, 1, 1],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])
# 逻辑或
np.where(np.logical_or(temp > 0.5, temp < - 0.5), 1, 0)
array([[0, 0, 1, 1],
       [0, 0, 0, 0],
       [1, 1, 1, 1],
       [1, 0, 1, 1]])

4.4 统计运算

temp
array([[-0.09452151,  0.21494329, -0.88702803,  1.        ],
       [ 0.22267303,  0.2302336 ,  0.07200738,  0.03890387],
       [ 1.        ,  1.        ,  1.        , -2.85986462],
       [-0.89594979, -0.31032916,  1.        , -0.89782976]])
# 最大值, axis=1为行
np.max(temp, axis=1)
array([1.       , 0.2302336, 1.       , 1.       ])
# 最小值
np.min(temp, axis=1)
array([-0.88702803,  0.03890387, -2.85986462, -0.89782976])
# 中位数
np.median(temp, axis=1)
array([ 0.06021089,  0.1473402 ,  1.        , -0.60313948])
# 平均数
np.mean(temp, axis=1)
array([ 0.05834844,  0.14095447,  0.03503384, -0.27602718])
# 标准差
np.std(temp, axis=1)
array([0.67607978, 0.08633758, 1.67137041, 0.77465548])
# 方差
np.var(temp, axis=1)
array([0.45708387, 0.00745418, 2.79347904, 0.60009112])
# 返回最大索引
np.argmax(temp, axis=1)
array([3, 1, 0, 2])
# 返回最小索引
np.argmin(temp, axis=1)
array([2, 3, 3, 3])

5 数组间的运算

5.1 数组与数的运算

arr = np.array([[1,2,3,2,1,4], [5,6,1,2,3,1]])
arr
array([[1, 2, 3, 2, 1, 4],
       [5, 6, 1, 2, 3, 1]])
arr + 1
array([[2, 3, 4, 3, 2, 5],
       [6, 7, 2, 3, 4, 2]])
arr - 1
array([[0, 1, 2, 1, 0, 3],
       [4, 5, 0, 1, 2, 0]])
arr * 2
array([[ 2,  4,  6,  4,  2,  8],
       [10, 12,  2,  4,  6,  2]])
arr / 2
array([[0.5, 1. , 1.5, 1. , 0.5, 2. ],
       [2.5, 3. , 0.5, 1. , 1.5, 0.5]])

5.2 数组与数组运算

# 1维数组能与任意数组运算
a = np.array([[1,2,3],[3,4,5]])
a
array([[1, 2, 3],
       [3, 4, 5]])
b = np.array([2])
a + b
array([[3, 4, 5],
       [5, 6, 7]])
print(a.shape)
print(b.shape)
(2, 3)
(1,)
# 维度一致也可运算
c = np.array([2,2,2])
print(c.shape)
c
(3,)
array([2, 2, 2])
a + c
array([[3, 4, 5],
       [5, 6, 7]])
d = np.array([[1], [2]])
print(d.shape)
(2, 1)
a + d
array([[2, 3, 4],
       [5, 6, 7]])
e = np.array([[1], [2], [3]])
e.shape
(3, 1)
a + e
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-138-dba895c55d37> in <module>
----> 1 a + e
ValueError: operands could not be broadcast together with shapes (2,3) (3,1) 

6 矩阵运算

6.1 矩阵乘法

a = np.array([[80, 86],
[82, 80],
[85, 78],
[90, 90],
[86, 82],
[82, 90],
[78, 80],
[92, 94]])
b = np.array(([0.7], [0.3]))
np.matmul(a, b)
array([[81.8],
       [81.4],
       [82.9],
       [90. ],
       [84.8],
       [84.4],
       [78.6],
       [92.6]])
80 * 0.7 + 86 * 0.3
81.8
np.dot(a, b)
array([[81.8],
       [81.4],
       [82.9],
       [90. ],
       [84.8],
       [84.4],
       [78.6],
       [92.6]])
# 不支持标量与数组的乘法
np.matmul(a, 3)
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-144-f843891049e9> in <module>
----> 1 np.matmul(a, 3)
ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)
np.dot(a, 3)
array([[240, 258],
       [246, 240],
       [255, 234],
       [270, 270],
       [258, 246],
       [246, 270],
       [234, 240],
       [276, 282]])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值