NumPy题目

evolto0313
于 2024-06-22 21:45:31 发布
阅读量134
点赞数 3
文章标签： numpy
本文链接：https://blog.csdn.net/weixin_58577791/article/details/139888656
版权
# 1. 从数组np.arange(15)提取 5 到 10 (包括5和10)之间的所有元素
a = np.arange(15)
print(a[5:11])  # [ 5  6  7  8  9 10]
print(a[(a >= 5) & (a <= 10)])  # [ 5  6  7  8  9 10]

# 2. 将数组 np.arange(20)转变为 4 行 5 列的二维数组，并执行交换第 1 行和第 2 行，交换第 1 列和第 2 列。
b = np.arange(20)  # [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
b.resize((4, 5))  # resize()改变原数组，reshape()不改变原数组
# b
# [[ 0  1  2  3  4]
#  [ 5  6  7  8  9]
#  [10 11 12 13 14]
#  [15 16 17 18 19]]
b[[0, 1]] = b[[1, 0]]  # 交换第 1 行和第 2 行
# [[ 5  6  7  8  9]
#  [ 0  1  2  3  4]
#  [10 11 12 13 14]
#  [15 16 17 18 19]]

b[:, [0, 1]] = b[:, [1, 0]]  # 交换第 1 列和第 2 列
# [[ 6  5  7  8  9]
#  [ 1  0  2  3  4]
#  [11 10 12 13 14]
#  [16 15 17 18 19]]

# 3.将数组np.random.randint(1,10,size=(5,5))中所有的奇数替换为 0.
np.random.seed(10)
c = np.random.randint(1, 10, size=(5, 5))
# [[5 1 2 1 2]
#  [9 1 9 7 5]
#  [4 1 5 7 9]
#  [2 9 5 2 4]
#  [7 6 4 7 2]]
c[c % 2 != 0] = 0
print(c)
# [[0 0 2 0 2]
#  [0 0 0 0 0]
#  [4 0 0 0 0]
#  [2 0 0 2 4]
#  [0 6 4 0 2]]

# for i in range(5):
#     for j in range(5):
#         if c[i][j] % 2 == 1:
#             c[i][j] = 0
# print(c)

# 4.从1—50之间的均匀地产生随机数字20个，存储数组 d 中，替换大于等于 30 数为 0，并获取给定数组 d 中前 5 个最大值的位置(即5个数)。
np.random.seed(10)
d = np.random.uniform(1, 50, 20)
# [31.27058194 26.14377389 32.86946191 30.45090872 40.45593664 26.56071047
#  45.52379516 16.64256836  5.43250811 15.73430278  6.58523373 41.60538499
#   3.29791965 31.68807027 27.83172164 41.14506279 10.74842944 42.98566482
#  18.23097933 37.97773688]
d[d >= 30] = 0  # 替换大于等于 30 数为 0
# [ 0.          2.01684552  0.          0.         25.4268436  12.01503563
#  10.70508037  0.          9.28643099  5.32865089  0.          0.
#   1.19346505 26.09742091  0.          0.          0.         15.30192734
#   0.          0.        ]
print(np.argsort(d))  # 返回从小到大排序元素的下标
# [ 0 16 15 14 11 10 18 19  3  2  7 12  1  9  8  6  5 17  4 13]
print(np.argsort(d)[::-1][:5])
# [13  4 17  5  6]


# 5.使用numpy数组计算由5个坐标：(1,9)、(5,12)、(8,20)、(4,10)、(2,8) 构成的图形的周长
gra = np.array([[1, 9], [5, 12], [8, 20], [4, 10], [2, 8]])
# 根据第一列的值进行从小到大排序
gra = gra[np.argsort(gra[:, 0])]
print(gra)
# [[ 1  9]
#  [ 2  8]
#  [ 4 10]
#  [ 5 12]
#  [ 8 20]]
# 在创建一个numpy数组
gra1 = np.vstack((gra[1:, :], gra[0, :]))  # vstack()在末尾添加新的行
print(gra1)
# [[ 2  8]
#  [ 4 10]
#  [ 5 12]
#  [ 8 20]
#  [ 1  9]]
# 计算gra和gra1之间的距离
dis = np.sqrt(np.sum(np.square(gra - gra1), axis=1))
print(np.sum(np.sum(dis)))   # 28.061117220341906



# 6.鸢尾花数据集
# ①利用下列方式获取数组 iris_2d
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float')
print(iris_2d)
# [[5.1 3.5 1.4 0.2 nan]
#  [4.9 3.  1.4 0.2 nan]
#  [4.7 3.2 1.3 0.2 nan]
# ...

# ②在iris_2d数据集的随机20个位置插入 np.nan值(除已经是np.nan外)
np.random.seed(10)
iris_2d[np.random.randint(0, 150, size=20), np.random.randint(0, 3, size=20)] = np.nan
print(iris_2d)
# ...
# [5.7 nan 5.  2.  nan]
# [5.8 2.8 5.1 2.4 nan]
# [6.4 nan 5.3 2.3 nan]
# ...

# ③在iris_2d的sepallength（第1列）中查找缺失值的数量和位置
print(f"第一列缺失值数量为：{np.isnan(iris_2d[:, 0]).sum()}")  # 第一列缺失值数量为：4
print(f"第一列缺失值位置为：{np.where(np.isnan(iris_2d[:, 0]))}")
# 第一列缺失值位置为：(array([ 15,  16, 107, 125], dtype=int64),)

# ④筛选具有petallength（第3列>1.5和sepallengt（第1列）<5.0的iris_2d 的行
print(iris_2d[(iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)])
# [[4.8 3.4 1.6 0.2 nan]
#  [4.8 3.4 1.9 0.2 nan]
#  [4.7 3.2 1.6 0.2 nan]
#  [4.8 3.1 1.6 0.2 nan]
#  [4.9 2.4 3.3 1.  nan]
#  [4.9 2.5 4.5 1.7 nan]]

# ⑤筛选没有np.nan值的iris_2d数组的行
print(iris_2d[np.sum(np.isnan(iris_2d), axis=1) == 0])  # []

# ⑥在numpy数组中用0替换nan
iris_2d[np.isnan(iris_2d)] = 0
print(iris_2d)
# [[5.1 0.  1.4 0.2 0. ]
#  [4.9 3.  1.4 0.2 0. ]
#  [4.7 3.2 1.3 0.2 0. ]
# ...

# ⑧将iris_2d的花瓣长度（第 3 列）生成一个文本数组，如果花瓣长度为<3 则为’小’，3-5 则为’中’，’> = 5 则为’大’
pe = iris_2d[:, 2]
print(pe)  # [1.4 1.4 1.3 ...]
petal_length_bin = np.digitize(iris_2d[:, 2].astype('float'), [0, 3, 5, 10])
label_map = {1: 'small', 2: 'medium', 3: 'large'}
petal_length_cat = [label_map[x] for x in petal_length_bin]
print(petal_length_cat)
# ['small', 'small', 'small',...]

# ⑨在 iris_2d中为volume创建一个新列，其中volume是（pi *petallength(第一列) * sepal_length(第三列) ^2）/ 3
volume = (np.pi * iris_2d[:, 0] * (iris_2d[:, 2] ** 2))/3
print(volume)
# [ 10.46778672  10.05728528   8.31789015  10.83849465  10.262536
#   16.34256498   9.44153312  11.78097245   9.03103168  11.545353
#   12.72345025  12.86796351   9.85203456   5.44856886   8.74619395
volume = volume[:, np.newaxis]
print(volume)
# [[ 10.46778672]
#  [ 10.05728528]
#  [  8.31789015]
#  [ 10.83849465]
#  [ 10.262536  ]
#  ...
iris_2d = np.hstack([iris_2d, volume])
print(iris_2d)
# [[5.10000000e+00 0.00000000e+00 1.40000000e+00 2.00000000e-01
#   0.00000000e+00 1.04677867e+01]
#  [4.90000000e+00 3.00000000e+00 1.40000000e+00 2.00000000e-01
#   0.00000000e+00 1.00572853e+01]
# ...

# 10.查找在 iris 数据集的第 4 列花瓣宽度中第一次出现值大于 1.0 的位置
r1 = np.argwhere(iris_2d[:, 3].astype(float) > 1.0)[0]
print(r1)  # [50]