numpy下_大作业

大作业

导入数据

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris # 导入数据集
iris = load_iris() # 载入数据
X= iris.data
y = iris.target

target_dict = {0:'Iris-setosa',1:'Iris-versicolor',2:'Iris-virginica'}
def target(entry):
    if entry in target_dict:
        return target_dict[entry]
    else:
        return entry
target_1 = np.vectorize(target)
y = target_1(y)
y
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica'], dtype='<U15')
iris_data = np.c_[X, y]
iris_data
array([['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'],
       ['4.9', '3.0', '1.4', '0.2', 'Iris-setosa'],
       ['4.7', '3.2', '1.3', '0.2', 'Iris-setosa'],
       ['4.6', '3.1', '1.5', '0.2', 'Iris-setosa'],
       ['5.0', '3.6', '1.4', '0.2', 'Iris-setosa'],
       ['5.4', '3.9', '1.7', '0.4', 'Iris-setosa'],
       ['4.6', '3.4', '1.4', '0.3', 'Iris-setosa'],
       ['5.0', '3.4', '1.5', '0.2', 'Iris-setosa'],
       ['4.4', '2.9', '1.4', '0.2', 'Iris-setosa'],
       ['4.9', '3.1', '1.5', '0.1', 'Iris-setosa'],
       ['5.4', '3.7', '1.5', '0.2', 'Iris-setosa'],
       ['4.8', '3.4', '1.6', '0.2', 'Iris-setosa'],
       ['4.8', '3.0', '1.4', '0.1', 'Iris-setosa'],
       ['4.3', '3.0', '1.1', '0.1', 'Iris-setosa'],
       ['5.8', '4.0', '1.2', '0.2', 'Iris-setosa'],
       ['5.7', '4.4', '1.5', '0.4', 'Iris-setosa'],
       ['5.4', '3.9', '1.3', '0.4', 'Iris-setosa'],
       ['5.1', '3.5', '1.4', '0.3', 'Iris-setosa'],
       ['5.7', '3.8', '1.7', '0.3', 'Iris-setosa'],
       ['5.1', '3.8', '1.5', '0.3', 'Iris-setosa'],
       ['5.4', '3.4', '1.7', '0.2', 'Iris-setosa'],
       ['5.1', '3.7', '1.5', '0.4', 'Iris-setosa'],
       ['4.6', '3.6', '1.0', '0.2', 'Iris-setosa'],
       ['5.1', '3.3', '1.7', '0.5', 'Iris-setosa'],
       ['4.8', '3.4', '1.9', '0.2', 'Iris-setosa'],
       ['5.0', '3.0', '1.6', '0.2', 'Iris-setosa'],
       ['5.0', '3.4', '1.6', '0.4', 'Iris-setosa'],
       ['5.2', '3.5', '1.5', '0.2', 'Iris-setosa'],
       ['5.2', '3.4', '1.4', '0.2', 'Iris-setosa'],
       ['4.7', '3.2', '1.6', '0.2', 'Iris-setosa'],
       ['4.8', '3.1', '1.6', '0.2', 'Iris-setosa'],
       ['5.4', '3.4', '1.5', '0.4', 'Iris-setosa'],
       ['5.2', '4.1', '1.5', '0.1', 'Iris-setosa'],
       ['5.5', '4.2', '1.4', '0.2', 'Iris-setosa'],
       ['4.9', '3.1', '1.5', '0.2', 'Iris-setosa'],
       ['5.0', '3.2', '1.2', '0.2', 'Iris-setosa'],
       ['5.5', '3.5', '1.3', '0.2', 'Iris-setosa'],
       ['4.9', '3.6', '1.4', '0.1', 'Iris-setosa'],
       ['4.4', '3.0', '1.3', '0.2', 'Iris-setosa'],
       ['5.1', '3.4', '1.5', '0.2', 'Iris-setosa'],
       ['5.0', '3.5', '1.3', '0.3', 'Iris-setosa'],
       ['4.5', '2.3', '1.3', '0.3', 'Iris-setosa'],
       ['4.4', '3.2', '1.3', '0.2', 'Iris-setosa'],
       ['5.0', '3.5', '1.6', '0.6', 'Iris-setosa'],
       ['5.1', '3.8', '1.9', '0.4', 'Iris-setosa'],
       ['4.8', '3.0', '1.4', '0.3', 'Iris-setosa'],
       ['5.1', '3.8', '1.6', '0.2', 'Iris-setosa'],
       ['4.6', '3.2', '1.4', '0.2', 'Iris-setosa'],
       ['5.3', '3.7', '1.5', '0.2', 'Iris-setosa'],
       ['5.0', '3.3', '1.4', '0.2', 'Iris-setosa'],
       ['7.0', '3.2', '4.7', '1.4', 'Iris-versicolor'],
       ['6.4', '3.2', '4.5', '1.5', 'Iris-versicolor'],
       ['6.9', '3.1', '4.9', '1.5', 'Iris-versicolor'],
       ['5.5', '2.3', '4.0', '1.3', 'Iris-versicolor'],
       ['6.5', '2.8', '4.6', '1.5', 'Iris-versicolor'],
       ['5.7', '2.8', '4.5', '1.3', 'Iris-versicolor'],
       ['6.3', '3.3', '4.7', '1.6', 'Iris-versicolor'],
       ['4.9', '2.4', '3.3', '1.0', 'Iris-versicolor'],
       ['6.6', '2.9', '4.6', '1.3', 'Iris-versicolor'],
       ['5.2', '2.7', '3.9', '1.4', 'Iris-versicolor'],
       ['5.0', '2.0', '3.5', '1.0', 'Iris-versicolor'],
       ['5.9', '3.0', '4.2', '1.5', 'Iris-versicolor'],
       ['6.0', '2.2', '4.0', '1.0', 'Iris-versicolor'],
       ['6.1', '2.9', '4.7', '1.4', 'Iris-versicolor'],
       ['5.6', '2.9', '3.6', '1.3', 'Iris-versicolor'],
       ['6.7', '3.1', '4.4', '1.4', 'Iris-versicolor'],
       ['5.6', '3.0', '4.5', '1.5', 'Iris-versicolor'],
       ['5.8', '2.7', '4.1', '1.0', 'Iris-versicolor'],
       ['6.2', '2.2', '4.5', '1.5', 'Iris-versicolor'],
       ['5.6', '2.5', '3.9', '1.1', 'Iris-versicolor'],
       ['5.9', '3.2', '4.8', '1.8', 'Iris-versicolor'],
       ['6.1', '2.8', '4.0', '1.3', 'Iris-versicolor'],
       ['6.3', '2.5', '4.9', '1.5', 'Iris-versicolor'],
       ['6.1', '2.8', '4.7', '1.2', 'Iris-versicolor'],
       ['6.4', '2.9', '4.3', '1.3', 'Iris-versicolor'],
       ['6.6', '3.0', '4.4', '1.4', 'Iris-versicolor'],
       ['6.8', '2.8', '4.8', '1.4', 'Iris-versicolor'],
       ['6.7', '3.0', '5.0', '1.7', 'Iris-versicolor'],
       ['6.0', '2.9', '4.5', '1.5', 'Iris-versicolor'],
       ['5.7', '2.6', '3.5', '1.0', 'Iris-versicolor'],
       ['5.5', '2.4', '3.8', '1.1', 'Iris-versicolor'],
       ['5.5', '2.4', '3.7', '1.0', 'Iris-versicolor'],
       ['5.8', '2.7', '3.9', '1.2', 'Iris-versicolor'],
       ['6.0', '2.7', '5.1', '1.6', 'Iris-versicolor'],
       ['5.4', '3.0', '4.5', '1.5', 'Iris-versicolor'],
       ['6.0', '3.4', '4.5', '1.6', 'Iris-versicolor'],
       ['6.7', '3.1', '4.7', '1.5', 'Iris-versicolor'],
       ['6.3', '2.3', '4.4', '1.3', 'Iris-versicolor'],
       ['5.6', '3.0', '4.1', '1.3', 'Iris-versicolor'],
       ['5.5', '2.5', '4.0', '1.3', 'Iris-versicolor'],
       ['5.5', '2.6', '4.4', '1.2', 'Iris-versicolor'],
       ['6.1', '3.0', '4.6', '1.4', 'Iris-versicolor'],
       ['5.8', '2.6', '4.0', '1.2', 'Iris-versicolor'],
       ['5.0', '2.3', '3.3', '1.0', 'Iris-versicolor'],
       ['5.6', '2.7', '4.2', '1.3', 'Iris-versicolor'],
       ['5.7', '3.0', '4.2', '1.2', 'Iris-versicolor'],
       ['5.7', '2.9', '4.2', '1.3', 'Iris-versicolor'],
       ['6.2', '2.9', '4.3', '1.3', 'Iris-versicolor'],
       ['5.1', '2.5', '3.0', '1.1', 'Iris-versicolor'],
       ['5.7', '2.8', '4.1', '1.3', 'Iris-versicolor'],
       ['6.3', '3.3', '6.0', '2.5', 'Iris-virginica'],
       ['5.8', '2.7', '5.1', '1.9', 'Iris-virginica'],
       ['7.1', '3.0', '5.9', '2.1', 'Iris-virginica'],
       ['6.3', '2.9', '5.6', '1.8', 'Iris-virginica'],
       ['6.5', '3.0', '5.8', '2.2', 'Iris-virginica'],
       ['7.6', '3.0', '6.6', '2.1', 'Iris-virginica'],
       ['4.9', '2.5', '4.5', '1.7', 'Iris-virginica'],
       ['7.3', '2.9', '6.3', '1.8', 'Iris-virginica'],
       ['6.7', '2.5', '5.8', '1.8', 'Iris-virginica'],
       ['7.2', '3.6', '6.1', '2.5', 'Iris-virginica'],
       ['6.5', '3.2', '5.1', '2.0', 'Iris-virginica'],
       ['6.4', '2.7', '5.3', '1.9', 'Iris-virginica'],
       ['6.8', '3.0', '5.5', '2.1', 'Iris-virginica'],
       ['5.7', '2.5', '5.0', '2.0', 'Iris-virginica'],
       ['5.8', '2.8', '5.1', '2.4', 'Iris-virginica'],
       ['6.4', '3.2', '5.3', '2.3', 'Iris-virginica'],
       ['6.5', '3.0', '5.5', '1.8', 'Iris-virginica'],
       ['7.7', '3.8', '6.7', '2.2', 'Iris-virginica'],
       ['7.7', '2.6', '6.9', '2.3', 'Iris-virginica'],
       ['6.0', '2.2', '5.0', '1.5', 'Iris-virginica'],
       ['6.9', '3.2', '5.7', '2.3', 'Iris-virginica'],
       ['5.6', '2.8', '4.9', '2.0', 'Iris-virginica'],
       ['7.7', '2.8', '6.7', '2.0', 'Iris-virginica'],
       ['6.3', '2.7', '4.9', '1.8', 'Iris-virginica'],
       ['6.7', '3.3', '5.7', '2.1', 'Iris-virginica'],
       ['7.2', '3.2', '6.0', '1.8', 'Iris-virginica'],
       ['6.2', '2.8', '4.8', '1.8', 'Iris-virginica'],
       ['6.1', '3.0', '4.9', '1.8', 'Iris-virginica'],
       ['6.4', '2.8', '5.6', '2.1', 'Iris-virginica'],
       ['7.2', '3.0', '5.8', '1.6', 'Iris-virginica'],
       ['7.4', '2.8', '6.1', '1.9', 'Iris-virginica'],
       ['7.9', '3.8', '6.4', '2.0', 'Iris-virginica'],
       ['6.4', '2.8', '5.6', '2.2', 'Iris-virginica'],
       ['6.3', '2.8', '5.1', '1.5', 'Iris-virginica'],
       ['6.1', '2.6', '5.6', '1.4', 'Iris-virginica'],
       ['7.7', '3.0', '6.1', '2.3', 'Iris-virginica'],
       ['6.3', '3.4', '5.6', '2.4', 'Iris-virginica'],
       ['6.4', '3.1', '5.5', '1.8', 'Iris-virginica'],
       ['6.0', '3.0', '4.8', '1.8', 'Iris-virginica'],
       ['6.9', '3.1', '5.4', '2.1', 'Iris-virginica'],
       ['6.7', '3.1', '5.6', '2.4', 'Iris-virginica'],
       ['6.9', '3.1', '5.1', '2.3', 'Iris-virginica'],
       ['5.8', '2.7', '5.1', '1.9', 'Iris-virginica'],
       ['6.8', '3.2', '5.9', '2.3', 'Iris-virginica'],
       ['6.7', '3.3', '5.7', '2.5', 'Iris-virginica'],
       ['6.7', '3.0', '5.2', '2.3', 'Iris-virginica'],
       ['6.3', '2.5', '5.0', '1.9', 'Iris-virginica'],
       ['6.5', '3.0', '5.2', '2.0', 'Iris-virginica'],
       ['6.2', '3.4', '5.4', '2.3', 'Iris-virginica'],
       ['5.9', '3.0', '5.1', '1.8', 'Iris-virginica']], dtype='<U32')
outfile = r'.\iris.data'
pd.DataFrame(iris_data).to_csv(outfile)
iris_data = np.loadtxt(outfile, dtype=object, delimiter=',', skiprows = 1)
iris_data = iris_data[:, 1:]

2. 求出鸢尾属植物萼片长度的平均值、中位数和标准差(第1列,sepallength)

【知识点:统计相关】

  • 如何计算numpy数组的均值,中位数,标准差?
sepalLength = np.loadtxt(outfile, dtype = float, delimiter = ',', skiprows= 1, usecols = [1])
print(sepalLength)

[5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.  5.  5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
 5.5 4.9 4.4 5.1 5.  4.5 4.4 5.  5.1 4.8 5.1 4.6 5.3 5.  7.  6.4 6.9 5.5
 6.5 5.7 6.3 4.9 6.6 5.2 5.  5.9 6.  6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
 6.3 6.1 6.4 6.6 6.8 6.7 6.  5.7 5.5 5.5 5.8 6.  5.4 6.  6.7 6.3 5.6 5.5
 5.5 6.1 5.8 5.  5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.  6.9 5.6 7.7 6.3 6.7 7.2
 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.  6.9 6.7 6.9 5.8 6.8
 6.7 6.7 6.3 6.5 6.2 5.9]
print(np.mean(sepalLength))
# 5.843333333333334
print(np.median(sepalLength))
# 5.8
print(np.std(sepalLength))
# 0.8253012917851409
5.843333333333334
5.8
0.8253012917851409

3.创建一种标准化形式的鸢尾属植物萼片长度,其值正好介于0和1之间,这样最小值为0,最大值为1(第1列,sepallength)。

【知识点:统计相关】

  • 如何标准化数组?
amax = np.amax(sepalLength)
amin  = np.min(sepalLength)
x = (sepalLength - amin)/(amax - amin)
print(x[0:10])
[0.22222222 0.16666667 0.11111111 0.08333333 0.19444444 0.30555556
 0.08333333 0.19444444 0.02777778 0.16666667]
# 方法二
x = (sepalLength - amin)/np.ptp(sepalLength)
print(x[0:10])
[0.22222222 0.16666667 0.11111111 0.08333333 0.19444444 0.30555556
 0.08333333 0.19444444 0.02777778 0.16666667]

4. 找到鸢尾属植物萼片长度的第5和第95百分位数(第1列,sepallength)。

【知识点:统计相关】

  • 如何找到numpy数组的百分位数?
x = np.percentile(sepalLength, [5, 95])
print(x)
[4.6   7.255]

5. 把iris_data数据集中的20个随机位置修改为np.nan值。

【知识点:随机抽样】

  • 如何在数组中的随机位置修改值?
import numpy as np
iris_data = np.loadtxt(outfile, dtype = object, delimiter=',', skiprows= 1)
iris_data = iris_data [:, 1:]
i, j = iris_data.shape
print (i, j)
150 5
# 方法1
np.random.seed(20200621)
iris_data[np.random.randint(i, size= 20), np.random.randint(j, size = 20)] = np.nan
print(iris_data[0:10])
[['5.1' '3.5' '1.4' '0.2' 'Iris-setosa']
 ['4.9' '3.0' '1.4' '0.2' 'Iris-setosa']
 ['4.7' '3.2' '1.3' '0.2' 'Iris-setosa']
 ['4.6' '3.1' '1.5' '0.2' 'Iris-setosa']
 ['5.0' '3.6' '1.4' '0.2' 'Iris-setosa']
 ['5.4' nan '1.7' '0.4' 'Iris-setosa']
 ['4.6' '3.4' '1.4' '0.3' 'Iris-setosa']
 ['5.0' '3.4' '1.5' '0.2' 'Iris-setosa']
 ['4.4' '2.9' '1.4' '0.2' nan]
 ['4.9' '3.1' '1.5' '0.1' 'Iris-setosa']]
# 方法2
np.random.seed(20200620)
# 参数意思分别 是从a 中以概率P,随机选择3个, p没有指定的时候相当于是一致的分布
a1 = np.random.choice(a=5, size=3, replace=False, p=None)
iris_data[np.random.choice(i, size=20), np.random.choice(j, size=20)] = np.nan
print(iris_data[0:10])
[['5.1' '3.5' nan '0.2' 'Iris-setosa']
 ['4.9' '3.0' '1.4' '0.2' 'Iris-setosa']
 ['4.7' '3.2' '1.3' '0.2' 'Iris-setosa']
 ['4.6' '3.1' '1.5' '0.2' 'Iris-setosa']
 ['5.0' '3.6' '1.4' nan 'Iris-setosa']
 ['5.4' nan '1.7' '0.4' 'Iris-setosa']
 ['4.6' '3.4' '1.4' '0.3' 'Iris-setosa']
 ['5.0' '3.4' '1.5' '0.2' 'Iris-setosa']
 ['4.4' '2.9' '1.4' '0.2' nan]
 ['4.9' '3.1' '1.5' '0.1' 'Iris-setosa']]

6. 在iris_data的sepallength中查找缺失值的个数和位置(第1列)。

【知识点:逻辑函数、搜索】

  • 如何在numpy数组中找到缺失值的位置?
sepallength = iris_data[:, 0]
sepallength
a = np.isnan(iris_data[:, 0])
print(sum(a)) # 6
print(np.where(a))
# (array([ 26, 44, 55, 63, 90, 115], dtype=int64),)
0
(array([], dtype=int64),)

7. 筛选具有 sepallength(第1列)< 5.0 并且 petallength(第3列)> 1.5 的 iris_data行。

【知识点:搜索】

  • 如何根据两个或多个条件筛选numpy数组?
import numpy as np
outfile = r'.\iris.data'
iris_data = np.loadtxt(outfile, dtype='float', delimiter=',', skiprows=1, usecols=[1, 2, 3, 4])
sepallength = iris_data[:, 0]
petallength = iris_data[:, 2]
index = np.where(np.logical_and(petallength > 1.5, sepallength < 5.0))
print(iris_data[index])
[[4.8 3.4 1.6 0.2]
 [4.8 3.4 1.9 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [4.9 2.4 3.3 1. ]
 [4.9 2.5 4.5 1.7]]

选择没有任何 nan 值的 iris_data行。

【知识点:逻辑函数、搜索】

  • 如何从numpy数组中删除包含缺失值的行?
import numpy as np
outfil = r'./iris.data'
iris_data = np.loadtxt(outfile, dtype=object, delimiter=',', skiprows=1, usecols=[1, 2, 3, 4])
i, j = iris_data.shape
np.random.seed(20200621)
iris_data[np.random.randint(i, size= 20), np.random.randint(j, size = 20)] = np.nan
b = np.isnan(iris_data)
x = iris_data[np.sum(b, axis=1) == 0]
print(x[0:10])
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-51-4ab136197bf5> in <module>
      5 np.random.seed(20200621)
      6 iris_data[np.random.randint(i, size= 20), np.random.randint(j, size = 20)] = np.nan
----> 7 b = np.isnan(iris_data)
      8 x = iris_data[np.sum(b, axis=1) == 0]
      9 print(x[0:10])


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

9. 计算 iris_data 中sepalLength(第1列)和petalLength(第3列)之间的相关系数。

【知识点:统计相关】

  • 如何计算numpy数组两列之间的相关系数?
iris_data = np.loadtxt(outfile, dtype = 'float', delimiter = ',', skiprows = 1, usecols = [1, 2, 3, 4])
sepalLength = iris_data[:, 0]
petalLength = iris_data[:, 2]
x = np.corrcoef(sepalLength,petalLength )
x
array([[1.        , 0.87175378],
       [0.87175378, 1.        ]])

10. 找出iris_data是否有任何缺失值。

  1. 找出iris_data是否有任何缺失值。
  • 如何查找给定数组是否具有空值?
x = np.isnan(iris_data)
x
print(np.any(x)) # False
False

11. 在numpy数组中将所有出现的nan替换为0。

【知识点:逻辑函数】

  • 如何在numpy数组中用0替换所有缺失值?
import numpy as np
outfile = r'.\iris.data'
iris_data = np.loadtxt(outfile, dtype=float, delimiter=',', skiprows=1, usecols=[1, 2, 3,
4])
i, j = iris_data.shape
np.random.seed(20200621)
iris_data[np.random.randint(i, size=20), np.random.randint(j, size=20)] = np.nan
iris_data[np.isnan(iris_data)] = 0
print(iris_data[0:10])
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 0.  1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 0.  0.2]
 [4.9 3.1 1.5 0.1]]

12. 找出鸢尾属植物物种中的唯一值和唯一值出现的数量。

【知识点:数组操作】

  • 如何在numpy数组中查找唯一值的计数?
import numpy as np
outfile = r'.\iris.data'
iris_data = np.loadtxt(outfile, dtype=object, delimiter=',', skiprows=1, usecols=[5])
x = np.unique(iris_data, return_counts= True)
print(x)
# (array(['Iris‐setosa', 'Iris‐versicolor', 'Iris‐virginica'], dtype=object), array([50,
# 50, 50], dtype=int64))
(array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object), array([50, 50, 50], dtype=int64))

13. 将 iris_data 的花瓣长度(第3列)以形成分类变量的形式显示。定义:Less than 3 -->‘small’;3-5 --> ‘medium’;’>=5 --> ‘large’。

【知识点:统计相关】

  • 如何将数字转换为分类(文本)数组?
import numpy as np
outfile = r'.\iris.data'
iris_data = np.loadtxt(outfile, dtype=float, delimiter=',', skiprows=1, usecols=[1, 2, 3,
4])
petal_length_bin = np.digitize(iris_data[:, 3], [0, 3, 5, 10])
label_map = {1: 'small', 2: 'medium', 3: 'large', 4: np.nan}
petal_length_cat = [label_map[x] for x in petal_length_bin]
print(petal_length_cat[0:10])
# ['small', 'small', 'small', 'small', 'small', 'small', 'small', 'small', 'small',
#'small']
['small', 'small', 'small', 'small', 'small', 'small', 'small', 'small', 'small', 'small']

14. 在 iris_data 中创建一个新列,其中 volume 是 (pi x petallength x sepallength ^ 2)/ 3 。

【知识点:数组操作】

  • 如何从numpy数组的现有列创建新列?
import numpy as np
outfile = r'.\iris.data'
iris_data = np.loadtxt(outfile, dtype=object, delimiter=',', skiprows=1)
sepalLength = iris_data[:, 1].astype(float)
petalLength = iris_data[:, 3].astype(float)
volume = (np.pi * petalLength * sepalLength ** 2) / 3
print(volume )
volume = volume[:, np.newaxis]
# 这样改变维度的作用往往是将一维的数据转变成一个矩阵,与代码后面的权重矩阵进行相乘, 
# 否则单单的数据是不能呢这样相乘的哦。这样改变维度的作用往往是将一维的数据转变成一个矩阵,
# 与代码后面的权重矩阵进行相乘, 否则单单的数据是不能呢这样相乘的哦。
iris_data = np.concatenate([iris_data, volume], axis=1)
print(iris_data[0:10])
# [['5.1' '3.5' '1.4' '0.2' 'Iris‐setosa' 38.13265162927291]
# ['4.9' '3.0' '1.4' '0.2' 'Iris‐setosa' 35.200498485922445]
# ['4.7' '3.2' '1.3' '0.2' 'Iris‐setosa' 30.0723720777127]
# ['4.6' '3.1' '1.5' '0.2' 'Iris‐setosa' 33.238050274980004]
# ['5.0' '3.6' '1.4' '0.2' 'Iris‐setosa' 36.65191429188092]
[ 38.13265163  35.20049849  30.07237208  33.23805027  36.65191429
  51.91167701  31.02218026  39.26990817  28.38324243  37.71481981
  45.80442089  38.60389053  33.77840421  21.29895099  42.27327075
  51.03517266  39.69716477  38.13265163  57.83986235  40.85641246
  51.91167701  40.85641246  22.15870018  46.30393412  45.84212
  41.88790205  41.88790205  42.47433268  39.6427105   37.01215025
  38.60389053  45.80442089  42.47433268  44.34881629  37.71481981
  31.41592654  41.1810437   35.20049849  26.35586797  40.85641246
  34.03392041  27.56747554  26.35586797  41.88790205  51.75145578
  33.77840421  43.58017329  31.02218026  44.12366882  36.65191429
 241.16959604 193.01945264 244.29966952 126.71090369 203.52284408
 153.10551797 195.34737279  82.97260357 209.83325652 110.43326496
  91.62978573 153.10237638 150.79644737 183.14123814 118.22441474
 206.83827152 147.78051842 144.43367505 181.14423241 128.0764493
 174.97414443 155.86488352 203.66002695 183.14123814 184.4408103
 200.71007145 232.42759088 235.04349037 169.64600329 119.08206953
 120.37535851 117.20758592 137.38812993 192.2654704  137.41326267
 169.64600329 220.94088094 182.87839155 134.64447234 126.71090369
 139.38199406 179.24461605 140.91090249  86.39379797 137.92848386
 142.89848344 142.89848344 173.09337763  81.71282492 139.4961386
 249.37962484 179.66140067 311.45644848 232.75431652 256.61575992
 399.20846168 113.14445942 351.57249227 272.65044882 331.14899843
 225.64489234 227.33402199 266.32328122 170.11724219 179.66140067
 227.33402199 243.34253096 415.99189683 428.40956539 188.49555922
 284.18532985 160.91656451 415.99189683 203.66002695 267.94957902
 325.72032632 193.22051457 190.93448231 240.2019855  314.86298211
 349.80168121 418.27583469 240.2019855  211.97268112 218.21083693
 378.73889114 232.75431652 235.91266433 180.95573685 269.22820723
 263.24870921 254.2710846  179.66140067 285.69224713 267.94957902
 244.44522998 207.81635403 230.069302   217.37307889 185.91002846]
[['0' '5.1' '3.5' '1.4' '0.2' 'Iris-setosa' 38.13265162927291]
 ['1' '4.9' '3.0' '1.4' '0.2' 'Iris-setosa' 35.200498485922445]
 ['2' '4.7' '3.2' '1.3' '0.2' 'Iris-setosa' 30.0723720777127]
 ['3' '4.6' '3.1' '1.5' '0.2' 'Iris-setosa' 33.238050274980004]
 ['4' '5.0' '3.6' '1.4' '0.2' 'Iris-setosa' 36.65191429188092]
 ['5' '5.4' '3.9' '1.7' '0.4' 'Iris-setosa' 51.911677007917746]
 ['6' '4.6' '3.4' '1.4' '0.3' 'Iris-setosa' 31.022180256648003]
 ['7' '5.0' '3.4' '1.5' '0.2' 'Iris-setosa' 39.269908169872416]
 ['8' '4.4' '2.9' '1.4' '0.2' 'Iris-setosa' 28.38324242763259]
 ['9' '4.9' '3.1' '1.5' '0.1' 'Iris-setosa' 37.714819806345474]]

15. 随机抽鸢尾属植物的种类,使得Iris-setosa的数量是Iris-versicolor和Iris-virginica数量的两倍.

【知识点:随机抽样】

  • 如何在numpy中进行概率抽样?
import numpy as np
species = np.array(['Iris‐setosa', 'Iris‐versicolor', 'Iris‐virginica'])
species_out = np.random.choice(species,10000,  p = [0.5, 0.25, 0.25])
print(np.unique(species_out, return_counts = True))
(array(['Iris‐setosa', 'Iris‐versicolor', 'Iris‐virginica'], dtype='<U15'), array([5057, 2445, 2498], dtype=int64))

16. 根据 sepallength 列对数据集进行排序。

【知识点:排序】

  • 如何按列对2D数组进行排序?
iris_data = np.loadtxt(outfile, dtype = object, delimiter = ',', skiprows = 1)
sepalLength = iris_data[:, 1]
index = np.argsort(sepalLength)
print(iris_data[index][0:10])
[['13' '4.3' '3.0' '1.1' '0.1' 'Iris-setosa']
 ['42' '4.4' '3.2' '1.3' '0.2' 'Iris-setosa']
 ['38' '4.4' '3.0' '1.3' '0.2' 'Iris-setosa']
 ['8' '4.4' '2.9' '1.4' '0.2' 'Iris-setosa']
 ['41' '4.5' '2.3' '1.3' '0.3' 'Iris-setosa']
 ['22' '4.6' '3.6' '1.0' '0.2' 'Iris-setosa']
 ['3' '4.6' '3.1' '1.5' '0.2' 'Iris-setosa']
 ['6' '4.6' '3.4' '1.4' '0.3' 'Iris-setosa']
 ['47' '4.6' '3.2' '1.4' '0.2' 'Iris-setosa']
 ['2' '4.7' '3.2' '1.3' '0.2' 'Iris-setosa']]

17. 在鸢尾属植物数据集中找到最常见的花瓣长度值(第3列)。

【知识点:数组操作】

  • 如何在numpy数组中找出出现次数最多的值?
iris_data = np.loadtxt(outfile, dtype = object, delimiter = ',', skiprows = 1)
petalLength = iris_data[:, 3]
vals, counts = np.unique(petalLength, return_counts=True)
print(vals[np.argmax(counts)]) # 1.5 #取出counts中元素最大值所对应的索引,
print(np.amax(counts)) # 14
1.4
13

18. 在鸢尾花数据集的 petalwidth(第4列)中查找第一次出现的值大于1.0的位置。

【知识点:搜索】

  • 如何找到第一次出现大于给定值的位置?
iris_data = np.loadtxt(outfile, dtype=float, delimiter=',', skiprows=1, usecols=[1, 2, 3,
4])
petalWidth = iris_data[:,3]
index = np.where(petalWidth > 1.0)
print(index)
print(index[0][0]) # 50
(array([ 50,  51,  52,  53,  54,  55,  56,  58,  59,  61,  63,  64,  65,
        66,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  80,
        82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  94,  95,
        96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
       109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
       122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
       135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
       148, 149], dtype=int64),)
50

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值