1、导入包
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mp
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
#在图中显示中文
mp.rcParams['font.sans-serif'] = [u'SimHei']
mp.rcParams['axes.unicode_minus'] = False
iris = load_iris()
##取150个样本,取前两列特征,花萼长度和宽度
x=iris.data[0:150,1:3] #能看特征数据的具体信息
y=iris.target[0:150] #能看每行数据的标签的值
##分别取前两类样本,0和1
samples_0 = x[y==0, :]#把y=0,即Iris-setosa的样本取出来
samples_1 = x[y==1, :]#把y=1,即Iris-versicolo的样本取出来
samples_2 = x[y==2, :]#把y=2,即Iris-virginica的样本取出来
#散点图可视化
plt.scatter(samples_0[:,0],samples_0[:,1],marker='o',color='r')
plt.scatter(samples_1[:,0],samples_1[:,1],marker='x',color='b')
plt.scatter(samples_2[:,0],samples_2[:,1],marker='*',color='y')
plt.xlabel('花萼宽度', fontsize=14)
plt.ylabel('花瓣长度',fontsize=14)
iris=load_iris()
#print(iris)
X_train,X_test,y_train,y_test = train_test_split(iris['data'],iris['target'],random_state=0)
print(y_train)
#分割训练集和测试集
#X_train,X_test, y_train, y_test =cross_validation.train_test_split(train_data,train_target,test_size=0.4, random_state=0
iris_dataframe = pd.DataFrame(X_train,columns=iris.feature_names)
print(iris_dataframe)
#iris.feature_names 列名字 ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
grr = pd.plotting.scatter_matrix(iris_dataframe,c=y_train,figsize=(20,20),marker='o',hist_kwds={'bins':30},s=60,alpha=1)
plt.show()
# 把数据转换成dataframe的格式
iris_d = pd.DataFrame(iris['data'], columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])
iris_d['Species'] = iris.target
def plot_iris(iris, col1, col2):
sns.lmplot(x = col1, y = col2, data = iris, hue = "Species", fit_reg = False)
plt.xlabel(col1)
plt.ylabel(col2)
plt.title('鸢尾花种类分布图')
plt.show()
plot_iris(iris_d, 'Petal_Width', 'Sepal_Length')
iris=load_iris()#载入鸢尾花数据集
iris
datas = iris.data# (150,4):150 行 5 列
# print(datas)
# print(iris.target)
datas
x1 = [x[0] for x in datas]# 第1列:花萼长度 sepal_length
x2 = [x[1] for x in datas]# 第2列:花萼宽度 sepal_width
x3 = [x[2] for x in datas]# 第3列:花瓣长度 petal_length
x4 = [x[3] for x in datas]# 第4列:花瓣宽度 petal_width
plt.scatter(x1[:50], x2[:50], color='red', marker='o', label='setosa') # 前50个样本
plt.scatter(x1[50:100],x2[50:100],color='blue',marker='x',label='vericolor')# 中间50个样本
plt.scatter(x1[100:150],x2[100:150],color='green',marker='+',label='Virginica')# 后50个样本
plt.legend(loc=1)# loc=1,2,3,4分别表示label在右上角,左上角,左下角,右下角
plt.show()
# 单个散点图(其他的同理)
plt.scatter(x1,x2,color='blue',marker='o',label='sepal_length & sepal_width')
plt.scatter(x1,x3,color='red',marker='+',label='sepal_length & petal_length')
plt.scatter(x1,x4,color='green',marker='x',label='sepal_length & petal_width')
plt.legend(loc=2)
plt.show()