Numpy 学习专题（五）—— 排序、搜索、计数、集合

最新推荐文章于 2023-11-15 14:49:41 发布

怡颜悦色

最新推荐文章于 2023-11-15 14:49:41 发布

阅读量312

点赞数

分类专栏： Numpy学习

本文链接：https://blog.csdn.net/qq_42967904/article/details/109346504

版权

Numpy学习专栏收录该内容

10 篇文章 0 订阅

订阅专栏

前言

一、排序

`numpy.sort(a, axis=-1, kind='quicksort', order=None)`

参数 axis

np.random.seed(20201028)  # 2020/10/28 编
x = np.random.rand(3, 3) * 10
x = np.around(x, 2)
print(x)
# [[3.55 1.02 8.3 ]
#  [5.16 3.54 0.77]
#  [0.86 6.71 5.53]]

y = np.sort(x, axis=0)  # axis=0，按列排序
print(y)
# [[0.86 1.02 0.77]
#  [3.55 3.54 5.53]
#  [5.16 6.71 8.3 ]]

y = np.sort(x, axis=1)  # axis=1，按行排序
print(y)
# [[1.02 3.55 8.3 ]
#  [0.77 3.54 5.16]
#  [0.86 5.53 6.71]]

y = np.sort(x)  # 默认按行排序
print(y)
# [[1.02 3.55 8.3 ]
#  [0.77 3.54 5.16]
#  [0.86 5.53 6.71]]

参数 order

tp = np.dtype([('name', 'S10'), ('age', np.int)])

a = np.array([("dawang", 21), ("yiyi", 18)], dtype=tp)

b = np.sort(a, order='name')  # 按name排序
print(b)
# [(b'dawang', 21) (b'yiyi', 18)]

b = np.sort(a, order='age')  # 按age排序
print(b)
# [(b'yiyi', 18) (b'dawang', 21)]

`numpy.argsort()`

用元素的索引位置替代排序后的实际结果
一维数组

np.random.seed(20201028)

x = np.random.randint(0, 10, 10)
print(x)
# [5 5 1 8 9 1 7 9 5 6]

y = np.argsort(x)  # 返回从小到大排序后元素的索引位置
print(y)
# [2 5 0 1 8 9 6 3 4 7]
print(x[y])
# [1 1 5 5 5 6 7 8 9 9]

y = np.argsort(-x)  # 返回从大到小排序后元素的索引位置
print(y)
# [4 7 3 6 9 0 1 8 2 5]
print(x[y])
# [9 9 8 7 6 5 5 5 1 1]

二维数组

np.random.seed(20201028)  # 2020/10/28 编

x = np.random.rand(3, 3) * 10
x = np.around(x, 2)
print(x)
# [[3.55 1.02 8.3 ]
#  [5.16 3.54 0.77]
#  [0.86 6.71 5.53]]

y = np.argsort(x, axis=0)  # axis=0，按列排序
print(y)
# [[2 0 1]
#  [0 1 2]
#  [1 2 0]]

y = np.argsort(x, axis=1)  # axis=1，按行排序
y = np.argsort(x)  # 默认按行排序
print(y)
[[1 0 2]
 [2 1 0]
 [0 2 1]]
z = np.array([np.take(x[i], np.argsort(x[i])) for i in range(3)]) 
print(z)
# [[1.02 3.55 8.3 ]
#  [0.77 3.54 5.16]
#  [0.86 5.53 6.71]]

`numpy.lexsort()`

指定排序指标

np.random.seed(20201029)

x = np.random.rand(3, 3) * 10
x = np.around(x, 2)
print(x)
# [[9.37 8.65 5.74]
#  [3.78 3.62 9.97]
#  [5.7  4.8  5.44]]

index = np.lexsort([x[:, 0]])  # 以第一列为指标，把每行从小到大排序
y = x[index]  # [1 2 0]
print(y)
# [[3.78 3.62 9.97]
#  [5.7  4.8  5.44]
#  [9.37 8.65 5.74]]

index = np.lexsort([-1 * x[:, 0]])  # 以第一列为指标，把每行从大到小排序
y = x[index]
print(y)
# [[9.37 8.65 5.74]
#  [5.7  4.8  5.44]
#  [3.78 3.62 9.97]]

index_2 = np.lexsort([x[:, 1]])  # 以第二列为指标，把每行从小到大排序

index_ = np.lexsort([x[1, :]])  # 以第二行为指标，把每行从小到大排序
print(index_)  # [1 0 2]

`numpy.partition()`

以索引是 kth 的元素为基准，将元素分成两部分，即大于该元素的放在其后面，小于该元素的放在其前面。
一维

np.random.seed(100)

x = np.random.randint(1, 30, 10)
print(x)  # [ 9 25  4  8 24 16 17 11 21  3]

y = np.partition(x, kth=2)  # 选取第三小的数，比它小的数放前面，比它大的数放后面
print(y)  # [ 3  4  8 25 24 16 17 11 21  9]

二维

np.random.seed(100)

x = np.random.randint(1, 30, [8, 3])
print(x)
# [[ 9 25  4]
#  [ 8 24 16]
#  [17 11 21]
#  [ 3 22  3]
#  [ 3 15  3]
#  [18 17 25]
#  [16  5 12]
#  [29 27 17]]

z = np.partition(x, kth=2, axis=0)  # 选取每列第三小的数
print(z)
# [[ 3  5  3]
#  [ 3 11  3]
#  [ 8 15  4]
#  [ 9 22 21]
#  [17 24 16]
#  [18 17 25]
#  [16 25 12]
#  [29 27 17]]

z = np.partition(x, kth=-3, axis=0)  # 选取每列第三大的数
print(z)
# [[ 8  5  3]
#  [ 3 11  3]
#  [ 3 15  4]
#  [ 9 17 12]
#  [16 22 16]
#  [17 24 17]
#  [18 25 21]
#  [29 27 25]]

二、搜索

`numpy.argmax() & numpy.argmin()`

numpy.argmax()是返回数组最大值的索引，numpy.argmin()与之相反。
一维数组

np.random.seed(20201028)

x = np.random.rand(5) * 10
x = np.around(x, 2)
print(x)  # [3.55 1.02 8.3  5.16 3.54]

y = np.argmax(x)
print(y)  # 2
print(x[y]) # 8.3

二维数组

np.random.seed(20201028)

x = np.random.rand(3,3) * 10
x = np.around(x, 2)
print(x)
# [[3.55 1.02 8.3 ]
#  [5.16 3.54 0.77]
#  [0.86 6.71 5.53]]

y = np.argmax(x)  # 返回整个数组最大值的序号
print(y)  # 2

y = np.argmax(x, axis=0)  # 返回每一列最大值的索引
print(y)  # [1 2 0]

y = np.argmax(x, axis=1)  # 返回每一行最大值的索引
print(y)  # [2 0 1]

`numppy.nonzero()`

返回非零元素的索引值
一维数组

x = np.array([0, 2, 3, 1])
print(x)  # [0 2 3 1]
print(x.shape)  # (4,)
print(x.ndim)  # 1

y = np.nonzero(x)  # 找出非零元素索引
print(y)  # (array([1, 2, 3], dtype=int64),)

print(np.transpose(y))  # 描述出每一个非零元素在不同维度的索引值
# [[1]
#  [2]
#  [3]]

print(x[np.nonzero(x)])  # 返回非零元素
#[2 3 1]

二维数组

x = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]])
print(x)
# [[3 0 0]
#  [0 4 0]
#  [5 6 0]]
print(x.shape)  # (3, 3)
print(x.ndim)  # 2

y = np.nonzero(x)  # 找出非零元素索引
print(y)
# (array([0, 1, 2, 2], dtype=int64), array([0, 1, 0, 1], dtype=int64))

y = np.transpose(np.nonzero(x))  # 描述出每一个非零元素在不同维度的索引值
print(y)  
# [[0 0]
#  [1 1]
#  [2 0]
#  [2 1]]

y = x[np.nonzero(x)]  # 返回非零元素
print(y)  # [3 4 5 6]

将布尔数组转换成整数数组

x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(x)
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]]

y = x > 5
print(y)
# [[  False False False]
#  [ False  False  True]
#  [ True  True  True]]

y = np.nonzero(x > 5)
print(y)
# (array([1, 2, 2, 2], dtype=int64), array([2, 0, 1, 2], dtype=int64))

y = x[np.nonzero(x > 5)]
print(y)
# [6 7 8 9]

y = x[x > 3]
print(y)
# [6 7 8 9]

`numpy.where(condition, x, y)`

满足条件condition，输出x，不满足输出y。

x = np.arange(10)
print(x)
# [0 1 2 3 4 5 6 7 8 9]

y = np.where(x < 5, x, 10 * x)
print(y)  # [ 0  1  2  3  4 50 60 70 80 90]

x = np.array([[0, 1, 2],
              [0, 2, 4],
              [0, 3, 6]])
              
y = np.where(x < 4, x, -1)
print(y)
# [[ 0  1  2]
#  [ 0  2 -1]
#  [ 0  3 -1]]

只有condition，没有x和y，则输出满足条件元素的坐标(等价于numpy.nonzero)。
一维数组

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])

y = np.where(x > 5)
print(y)
# (array([5, 6, 7], dtype=int64),)
print(x[y])
# [6 7 8]

y = np.nonzero(x > 5)
print(y)
# (array([5, 6, 7], dtype=int64),)
print(x[y])  # [6 7 8]

二维数组

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
              
y = np.where(x > 25)
print(y)
# (array([3, 3, 3, 3, 3, 4, 4, 4, 4, 4], dtype=int64), array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int64))
print(x[y])
# [26 27 28 29 30 31 32 33 34 35]

y = np.nonzero(x > 25)
print(y)
# (array([3, 3, 3, 3, 3, 4, 4, 4, 4, 4], dtype=int64), array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int64))
print(x[y])
# [26 27 28 29 30 31 32 33 34 35]

`numpy.searchsorted(a, v, side='left')`

a：一维输入数组。
v：插入a数组的值，可以为单个元素，list或者ndarray。
side：查询方向，当为left时，将返回第一个符合条件的元素下标；当为right时，将返回最后一个符合条件的元素下标。
此处仅考虑数组a为升数组的情况。

x = np.array([0, 1, 5, 9, 11, 18, 26, 33])

y = np.searchsorted(x, 11)  # 第一个小于等于11的数对应索引为4
print(y)  # 4

y = np.searchsorted(x, 11, side='right')  # 最后一个小于等于11的数对应索引为5
print(y)  # 5

三、计数

`numpy.count_nonzero()`

返回数组中的非0元素个数。

x = np.array([0, 1, 5, 9, 0, 18, 26, 33])
y = np.count_nonzero(x)
print(y) # 6

x = np.array([[0,1,2],[2,1,3],[0,3,4]])
y = np.count_nonzero(x)
print(y)  # 7
y_1 = np.count_nonzero(x, axis=0)  # 统计每列有多少个非零元素
print(y_1)
y_2 = np.count_nonzero(x, axis=1)  # 统计每行有多少个非零元素
print(y_2)

四、集合

1.构造集合

x = np.unique([1, 1, 3, 2, 3, 3])
print(x)  # [1 2 3]

x = np.array([[1, 1], [2, 3]])
u = np.unique(x)
print(u)  # [1 2 3]

x = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
y = np.unique(x, axis=0)  # 以行为元素单位
print(y)
# [[1 0 0]
#  [2 3 4]]

x = np.array(['a', 'b', 'b', 'c', 'a'])
u, index = np.unique(x, return_index=True)  # 表示返回新列表元素在旧列表中的位置。
print(u)  # ['a' 'b' 'c']
print(index)  # [0 1 3]
print(x[index])  # ['a' 'b' 'c']

x = np.array([1, 2, 6, 4, 2, 3, 2])

u, index = np.unique(x, return_inverse=True)  # 表示返回旧列表元素在新列表中的位置。
print(u)  # [1 2 3 4 6]
print(index)  # [0 1 4 3 1 2 1]
print(u[index])  # [1 2 6 4 2 3 2]

u, count = np.unique(x, return_counts=True)  # 表示返回新列表元素在旧列表中出现的次数。
print(u)  # [1 2 3 4 6]
print(count)  # [1 3 1 1 1]

2.布尔运算

numpy.in1d(ar1, ar2, assume_unique=False, invert=False)
前面的数组是否包含于后面的数组，返回布尔值。

test = np.array([0, 1, 2, 5, 0])
states = [0, 2]

mask = np.in1d(test, states)
print(mask)  # [ True False  True False  True]
print(test[mask])  # [0 2 0]，即test中的这三个元素在mask也有

mask = np.in1d(test, states, invert=True)  # 默认为False
print(mask)  # [False  True False  True False]
print(test[mask])  # [1 5]，即test中的这两个元素在mask没有

3.求两个集合的交集、并集和差集

交集 numpy.intersect1d(ar1, ar2)

x = np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
print(x)  # [1 3]

x = np.array([1, 1, 2, 3, 4])
y = np.array([2, 1, 4, 6])
xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
print(x_ind)  # [0 2 4]
print(y_ind)  # [1 0 2]
print(xy)  # [1 2 4]
# 即x中的索引为0，2，4的三个元素分别于y中的索引为1，0，2三个元素相同，这三个元素值为1，2，4

并集 numpy.union1d(ar1, ar2)

x = np.union1d([-1, 0, 1], [-2, 0, 2, 5])  # 返回两数组并集，并按升序排序
print(x)  # [-2 -1  0  1  2 5]

差集 numpy.setdiff1d(ar1, ar2), 集合的差，即元素存在于第一个函数不存在于第二个函数中。

a = np.array([1, 2, 3, 2, 4, 1])
b = np.array([3, 4, 5, 6])
x = np.setdiff1d(a, b)
print(x)  # [1 2]

4.求两个集合的异或

setxor1d(ar1, ar2)，异或，即两个数组中各自独自拥有的元素的集合。

a = np.array([1, 2, 3, 2, 4, 1])
b = np.array([3, 4, 5, 6])
x = np.setxor1d(a, b)
print(x)  # [1 2 5 6]

怡颜悦色

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录