# 1. 从数组np.arange(15)提取 5 到 10 (包括5和10)之间的所有元素
a = np.arange(15)
print(a[5:11]) # [ 5 6 7 8 9 10]
print(a[(a >= 5) & (a <= 10)]) # [ 5 6 7 8 9 10]
# 2. 将数组 np.arange(20)转变为 4 行 5 列的二维数组,并执行交换第 1 行和第 2 行,交换第 1 列和第 2 列。
b = np.arange(20) # [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
b.resize((4, 5)) # resize()改变原数组,reshape()不改变原数组
# b
# [[ 0 1 2 3 4]
# [ 5 6 7 8 9]
# [10 11 12 13 14]
# [15 16 17 18 19]]
b[[0, 1]] = b[[1, 0]] # 交换第 1 行和第 2 行
# [[ 5 6 7 8 9]
# [ 0 1 2 3 4]
# [10 11 12 13 14]
# [15 16 17 18 19]]
b[:, [0, 1]] = b[:, [1, 0]] # 交换第 1 列和第 2 列
# [[ 6 5 7 8 9]
# [ 1 0 2 3 4]
# [11 10 12 13 14]
# [16 15 17 18 19]]
# 3.将数组np.random.randint(1,10,size=(5,5))中所有的奇数替换为 0.
np.random.seed(10)
c = np.random.randint(1, 10, size=(5, 5))
# [[5 1 2 1 2]
# [9 1 9 7 5]
# [4 1 5 7 9]
# [2 9 5 2 4]
# [7 6 4 7 2]]
c[c % 2 != 0] = 0
print(c)
# [[0 0 2 0 2]
# [0 0 0 0 0]
# [4 0 0 0 0]
# [2 0 0 2 4]
# [0 6 4 0 2]]
# for i in range(5):
# for j in range(5):
# if c[i][j] % 2 == 1:
# c[i][j] = 0
# print(c)
# 4.从1—50之间的均匀地产生随机数字20个,存储数组 d 中,替换大于等于 30 数为 0,并获取给定数组 d 中前 5 个最大值的位置(即5个数)。
np.random.seed(10)
d = np.random.uniform(1, 50, 20)
# [31.27058194 26.14377389 32.86946191 30.45090872 40.45593664 26.56071047
# 45.52379516 16.64256836 5.43250811 15.73430278 6.58523373 41.60538499
# 3.29791965 31.68807027 27.83172164 41.14506279 10.74842944 42.98566482
# 18.23097933 37.97773688]
d[d >= 30] = 0 # 替换大于等于 30 数为 0
# [ 0. 2.01684552 0. 0. 25.4268436 12.01503563
# 10.70508037 0. 9.28643099 5.32865089 0. 0.
# 1.19346505 26.09742091 0. 0. 0. 15.30192734
# 0. 0. ]
print(np.argsort(d)) # 返回从小到大排序元素的下标
# [ 0 16 15 14 11 10 18 19 3 2 7 12 1 9 8 6 5 17 4 13]
print(np.argsort(d)[::-1][:5])
# [13 4 17 5 6]
# 5.使用numpy数组计算由5个坐标:(1,9)、(5,12)、(8,20)、(4,10)、(2,8) 构成的图形的周长
gra = np.array([[1, 9], [5, 12], [8, 20], [4, 10], [2, 8]])
# 根据第一列的值进行从小到大排序
gra = gra[np.argsort(gra[:, 0])]
print(gra)
# [[ 1 9]
# [ 2 8]
# [ 4 10]
# [ 5 12]
# [ 8 20]]
# 在创建一个numpy数组
gra1 = np.vstack((gra[1:, :], gra[0, :])) # vstack()在末尾添加新的行
print(gra1)
# [[ 2 8]
# [ 4 10]
# [ 5 12]
# [ 8 20]
# [ 1 9]]
# 计算gra和gra1之间的距离
dis = np.sqrt(np.sum(np.square(gra - gra1), axis=1))
print(np.sum(np.sum(dis))) # 28.061117220341906
# 6.鸢尾花数据集
# ①利用下列方式获取数组 iris_2d
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float')
print(iris_2d)
# [[5.1 3.5 1.4 0.2 nan]
# [4.9 3. 1.4 0.2 nan]
# [4.7 3.2 1.3 0.2 nan]
# ...
# ②在iris_2d数据集的随机20个位置插入 np.nan值(除已经是np.nan外)
np.random.seed(10)
iris_2d[np.random.randint(0, 150, size=20), np.random.randint(0, 3, size=20)] = np.nan
print(iris_2d)
# ...
# [5.7 nan 5. 2. nan]
# [5.8 2.8 5.1 2.4 nan]
# [6.4 nan 5.3 2.3 nan]
# ...
# ③在iris_2d的sepallength(第1列)中查找缺失值的数量和位置
print(f"第一列缺失值数量为:{np.isnan(iris_2d[:, 0]).sum()}") # 第一列缺失值数量为:4
print(f"第一列缺失值位置为:{np.where(np.isnan(iris_2d[:, 0]))}")
# 第一列缺失值位置为:(array([ 15, 16, 107, 125], dtype=int64),)
# ④筛选具有petallength(第3列>1.5和sepallengt(第1列)<5.0的iris_2d 的行
print(iris_2d[(iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)])
# [[4.8 3.4 1.6 0.2 nan]
# [4.8 3.4 1.9 0.2 nan]
# [4.7 3.2 1.6 0.2 nan]
# [4.8 3.1 1.6 0.2 nan]
# [4.9 2.4 3.3 1. nan]
# [4.9 2.5 4.5 1.7 nan]]
# ⑤筛选没有np.nan值的iris_2d数组的行
print(iris_2d[np.sum(np.isnan(iris_2d), axis=1) == 0]) # []
# ⑥在numpy数组中用0替换nan
iris_2d[np.isnan(iris_2d)] = 0
print(iris_2d)
# [[5.1 0. 1.4 0.2 0. ]
# [4.9 3. 1.4 0.2 0. ]
# [4.7 3.2 1.3 0.2 0. ]
# ...
# ⑧将iris_2d的花瓣长度(第 3 列)生成一个文本数组,如果花瓣长度为<3 则为’小’,3-5 则为’中’,’> = 5 则为’大’
pe = iris_2d[:, 2]
print(pe) # [1.4 1.4 1.3 ...]
petal_length_bin = np.digitize(iris_2d[:, 2].astype('float'), [0, 3, 5, 10])
label_map = {1: 'small', 2: 'medium', 3: 'large'}
petal_length_cat = [label_map[x] for x in petal_length_bin]
print(petal_length_cat)
# ['small', 'small', 'small',...]
# ⑨在 iris_2d中为volume创建一个新列,其中volume是(pi *petallength(第一列) * sepal_length(第三列) ^2)/ 3
volume = (np.pi * iris_2d[:, 0] * (iris_2d[:, 2] ** 2))/3
print(volume)
# [ 10.46778672 10.05728528 8.31789015 10.83849465 10.262536
# 16.34256498 9.44153312 11.78097245 9.03103168 11.545353
# 12.72345025 12.86796351 9.85203456 5.44856886 8.74619395
volume = volume[:, np.newaxis]
print(volume)
# [[ 10.46778672]
# [ 10.05728528]
# [ 8.31789015]
# [ 10.83849465]
# [ 10.262536 ]
# ...
iris_2d = np.hstack([iris_2d, volume])
print(iris_2d)
# [[5.10000000e+00 0.00000000e+00 1.40000000e+00 2.00000000e-01
# 0.00000000e+00 1.04677867e+01]
# [4.90000000e+00 3.00000000e+00 1.40000000e+00 2.00000000e-01
# 0.00000000e+00 1.00572853e+01]
# ...
# 10.查找在 iris 数据集的第 4 列花瓣宽度中第一次出现值大于 1.0 的位置
r1 = np.argwhere(iris_2d[:, 3].astype(float) > 1.0)[0]
print(r1) # [50]
10-22
2744
01-25
5133
07-22
1078
08-06
1096