利用随机森林,斯朴素贝叶斯,支持向量机对鸢尾花数据集进行分类和可视化

1. 导入所需要的库文件(准备阶段)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
plt.style.use('ggplot')    
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
sns.set(font_scale=1) #sns字体大小
sns.set(font='SimHei')  # 解决Seaborn中文显示问题
# Set default font size
#设置默认字体大小
plt.rcParams['font.size'] = 16
sns.set(font_scale = 2)
# Display up to 60 columns of a dataframe
# 最多显示60列
pd.set_option('display.max_columns', 60)
import warnings
from sklearn.datasets import load_iris

2. 导出数据集

data = load_iris() 
species = data.target 
iris_features = pd.DataFrame(data=data.data, columns=data.feature_names) #利用Pandas转化为DataFrame格式
iris_data['species'] = species
iris_data

在这里插入图片描述
3. 可视化部分
该部分主要是对花四个特征的数值分布情况进行可视化统计。主要利用核密度图与散点图搭配、小提琴与散点图搭配和3D可视化。

sns.set(font_scale=1)
g = sns.PairGrid(iris_data,hue="species")
g = g.map_upper(sns.scatterplot)#在上对角线子图上用二元函数绘制的图
g = g.map_lower(sns.kdeplot,color='r')#在下对角线子图上用二元函数绘制的图
g = g.map_diag(sns.kdeplot)#对角线单变量子图

在这里插入图片描述

ax = sns.violinplot(x="species", y=iris_data.columns[0],data=iris_data,inner=None, whis=np.inf)
ax = sns.swarmplot(x="species", y=iris_data.columns[0], data=iris_data, color="c")

在这里插入图片描述

ax = sns.violinplot(x="species", y=iris_data.columns[1],data=iris_data,inner=None, whis=np.inf)
ax = sns.swarmplot(x="species", y=iris_data.columns[1], data=iris_data, color="c")

在这里插入图片描述

p0 = iris_data.iloc[iris_data['species'][iris_data['species'] == 0].index,:]  
p1 = iris_data.iloc[iris_data['species'][iris_data['species'] == 1].index,:]  
p2 = iris_data.iloc[iris_data['species'][iris_data['species'] == 2].index,:]  
p0_x = p0.iloc[:,0]
p0_y = p0.iloc[:,1]
p0_z = p0.iloc[:,2]

p1_x = p1.iloc[:,0]
p1_y = p1.iloc[:,1]
p1_z = p1.iloc[:,2]

p2_x = p2.iloc[:,0]
p2_y = p2.iloc[:,1]
p2_z = p2.iloc[:,2]
sns.set(font='SimHei')  # 解决Seaborn中文显示问题
fig = plt.figure(figsize=(16,10))
ax = plt.axes(projection = '3d')
plt.style.use('ggplot')  
# ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
plt.rcParams['font.size'] = 8
ax.scatter3D(p0_x,p0_y,p0_z,alpha=0.7,label = '0',marker='*',c='y')
ax.scatter3D(p1_x,p1_y,p1_z,alpha=0.7,label = '1',marker='p')
ax.scatter3D(p2_x,p2_y,p2_z,alpha=0.7,label = '2',marker='^')

# 添加坐标轴(顺序是Z, Y, X)
ax.set_zlabel('sepal length (cm)', fontdict={'size': 15})
ax.set_ylabel('sepal width (cm)', fontdict={'size': 15})
ax.set_xlabel('petal length (cm)', fontdict={'size': 15})

plt.legend()


在这里插入图片描述

4. 分类

  • 导包
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import LeaveOneOut

# from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,accuracy_score
  • 数据集X,y提取
X = iris_data.iloc[:,:-1]
Y = iris_data.iloc[:,-1]
# X = StandardScaler().fit_transform(X)
X = pd.DataFrame(X)
  • 留一法进行训练
def leave_one_out(algr, X, y):
    loo = LeaveOneOut()
    count = 0
    l = []
    for train,test in loo.split(X):
        X_train, X_test = np.array(X.iloc[train,:]),np.array(X.iloc[test,:])
        y_train,y_test = list(y[train]),y[test]
        X_train = pd.DataFrame(X_train)
        model = algr.fit(X_train, y_train)
        predicted_y = model.predict(X_test)
        l.append(predicted_y)
#         print('Raw:',np.array(y_test),'Forecast:',predicted_y)
        if np.array(y_test) == predicted_y:
            count = count + 1
    print('Using the leave one test and The assessment results are: {}'.format(count/X.shape[0]))
    return algr,l
  • 算法调用
clf_gs = GaussianNB()
print("GaussianNB:")
clf_gs,l_gs = leave_one_out(clf_gs, X, Y)

clf_svm = SVC(kernel='linear', C=1E10)
print("SVM:")
clf_svm,l_svm = leave_one_out(clf_svm, X, Y)

clf_RF = RandomForestClassifier()
print("RandomForestClassifier:")
clf_RF,l_RF = leave_one_out(clf_RF, X, Y)

GaussianNB:
Using the leave one test and The assessment results are: 0.9533333333333334
SVM:
Using the leave one test and The assessment results are: 0.96
RandomForestClassifier:
Using the leave one test and The assessment results are: 0.9533333333333334

  • 混淆矩阵
classes = ['0','1','2']
def plot_confusion_matrix(cm,title='Confusion Matrix'):

    plt.figure(figsize=(12, 8), dpi=100)
    np.set_printoptions(precision=2)

    ind_array = np.arange(len(classes))
    x, y = np.meshgrid(ind_array, ind_array)
    for x_val, y_val in zip(x.flatten(), y.flatten()):
        c = cm[y_val][x_val]
        if c > 0.001:
            plt.text(x_val, y_val, "%0.2f" % (c,), color='red', fontsize=15, va='center', ha='center')
    
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.binary)
    plt.title(title)
    plt.colorbar()
    xlocations = np.array(range(len(classes)))
    plt.xticks(xlocations, classes, rotation=90)
    plt.yticks(xlocations, classes)
    plt.ylabel('Actual label')
    plt.xlabel('Predict label')
    
    # offset the tick
    tick_marks = np.array(range(len(classes))) + 0.5
    plt.gca().set_xticks(tick_marks, minor=True)
    plt.gca().set_yticks(tick_marks, minor=True)
    plt.gca().xaxis.set_ticks_position('none')
    plt.gca().yaxis.set_ticks_position('none')
    plt.grid(True, which='minor', linestyle='-')
    plt.gcf().subplots_adjust(bottom=0.15)
    
    # show confusion matrix
    plt.show()
cm = confusion_matrix(Y,l_gs)
plot_confusion_matrix(cm,title='confusion matrix (GaussianNB)')
cm = confusion_matrix(Y,l_svm)
plot_confusion_matrix(cm,title='confusion matrix (SVM)')
cm = confusion_matrix(Y,l_RF)
plot_confusion_matrix(cm,title='confusion matrix (RandomForestClassifier)')

在这里插入图片描述

以下是使用朴素贝叶斯支持向量机、决策树对鸢尾花数据集分类的Python代码: 1. 朴素贝叶斯分类器 ```python from sklearn.datasets import load_iris from sklearn.naive_bayes import GaussianNB from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # 加载数据集 iris = load_iris() X = iris.data y = iris.target # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 创建朴素贝叶斯分类器 gnb = GaussianNB() # 训练模型 gnb.fit(X_train, y_train) # 预测测试集 y_pred = gnb.predict(X_test) # 计算准确率 accuracy = accuracy_score(y_test, y_pred) print("朴素贝叶斯分类器的准确率为:", accuracy) ``` 2. 支持向量机分类器 ```python from sklearn.datasets import load_iris from sklearn.svm import SVC from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # 加载数据集 iris = load_iris() X = iris.data y = iris.target # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 创建支持向量机分类器 svc = SVC(kernel='linear') # 训练模型 svc.fit(X_train, y_train) # 预测测试集 y_pred = svc.predict(X_test) # 计算准确率 accuracy = accuracy_score(y_test, y_pred) print("支持向量机分类器的准确率为:", accuracy) ``` 3. 决策树分类器 ```python from sklearn.datasets import load_iris from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # 加载数据集 iris = load_iris() X = iris.data y = iris.target # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 创建决策树分类器 dtc = DecisionTreeClassifier() # 训练模型 dtc.fit(X_train, y_train) # 预测测试集 y_pred = dtc.predict(X_test) # 计算准确率 accuracy = accuracy_score(y_test, y_pred) print("决策树分类器的准确率为:", accuracy) ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ACxz

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值