numpy数据集
#安装scipy,numpy,sklearn包 from sklearn.datasets import load_iris import numpy as np import pandas as pd #从sklearn包自带的数据集中读出鸢尾花数据集data data=load_iris() #.查看data类型 print(type(data)) print(data.keys()) #取出鸢尾花特征和鸢尾花类别数据 iris_feature=data['data'] print(type(iris_feature)) print(iris_feature) n_sample,n_features=data.data.shape print(n_sample,n_features) #查看第一组数据 print(data.data[0] print(data.target.shape) l=data['target'] print(l) print(data.target_names) # 列名称 print("feature_names:",data.feature_names) # 取出所有花的花萼长度(cm)的数据 D=data.data X=[x[0] for x in D] print(X) # 取出所有花的花瓣长度(cm)+花瓣宽度(cm)的数据 Y=[x[2] for x in D] print(Y) Z=[x[3] for x in D] print(Z) z=np.array([Y+Z]) print(z) #.取出某朵花的四个特征及其类别 print(iris_feature[0], l[0]) #将所有花的特征和类别分成三组,每组50个 issetosa=[] isversicolor=[] isvirginica=[] for i in range(0,150): if l[i] == 0: data = iris_feature[i].tolist() data.append('setosa') issetosa.append(data) elif l[i] == 1: data = iris_feature[i].tolist() data.append('versicolor') isversicolor.append(data) else: data = iris_feature[i].tolist() data.append('virginica') isvirginica.append(data) #生成新的数组,每个数组包含四个特征和类别 iris_re = np.array([issetosa,isversicolor,isvirginica]) print(iris_re)