#使用pandas读取数据转化为numpy数组
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd
data = pd.read_csv(r'./irsi.txt',sep=',')
data
![16bee779a9fc0d686659784e66fd4f32.png](https://i-blog.csdnimg.cn/blog_migrate/aee0bca6283a67c638dbedec3409a0b2.png)
X = np.array(data)
X
![b4ff67bb91f29f088668954cdddb1b48.png](https://i-blog.csdnimg.cn/blog_migrate/dae7196e79e0e9d2e224f88d7b391d49.jpeg)
print("平均值:",np.mean(X[:,0]))
print("中位数:",np.percentile(X[:,0],50))
print("标准差:",np.std(X[:,0]))
![bb4a11a9bccf6d4a0d115b85e09d3d91.png](https://i-blog.csdnimg.cn/blog_migrate/3360a7b190d11ead3494f172d2a42b3b.png)
标准化 - 第一列
X[:,0] = (X[:,0] - np.amin(X[:, 0]))/(np.amax(X[:, 0]) - np.amin(X[:, 0]))
X[:,0]
![a0aabd54aa58d9d6605611980dc82e86.png](https://i-blog.csdnimg.cn/blog_migrate/2cefc04f445e2b40b085da168d24e5e2.jpeg)
随机抽取20个位置,将其变为nan
np.random.seed(20201201)
index_x = np.random.choice(149,20,replace = False) # 无放回随机抽样 ,X坐标
index_x
![8731821824d2bac11aaf6afa1e078036.png](https://i-blog.csdnimg.cn/blog_migrate/3fc31ead217bf11d633d167e713405b9.png)
index_y = np.random.randint(0,5,20) # 有放回随机抽样,Y坐标
index_y
![b0fc6c10c7f794dfaf52ddac58fb5b14.png](https://i-blog.csdnimg.cn/blog_migrate/09e3434cdf49caa2dac11242f7cd3c0d.png)
#更改并查看更改结果
for i in range(0,20):
X[index_x[i],index_y[i]] = np.nan
print(X)
![967f27ae1f13667e6ef191f28b6238b6.png](https://i-blog.csdnimg.cn/blog_migrate/46ceb799d3d31f6e04a361ad16bc30f3.jpeg)
筛选第一列大于0.5并且第三列小于3的数据
filte = X[np.where(E[:,0]>0.5) and np.where(E[:,3]<3)]
filte
![ae8f5676933742eedd862989e3d1c0af.png](https://i-blog.csdnimg.cn/blog_migrate/5c44704ad4adbbefe739fab4e7826594.jpeg)