演示数据是花粉数据:列是花粉种类 行是花粉特征
导入必要的库
import pandas as pd
import numpy as np
from sklearn. ensemble import RandomForestClassifier
from sklearn. model_selection import train_test_split
from sklearn. metrics import accuracy_score
from sklearn. metrics import confusion_matrix
import matplotlib. pyplot as plt
1、模型准确率
df = pd. read_csv( 'C:\\Users\\31425\\Desktop\\新建文件夹\\序列\\蒿属1.csv' )
X = df. iloc[ : , 1 : - 1 ]
y = df. iloc[ : , - 1 ]
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.2 , random_state= 42 )
rf = RandomForestClassifier( n_estimators= 100 , random_state= 42 )
rf. fit( X_train, y_train)
y_pred = rf. predict( X_test)
accuracy = accuracy_score( y_test, y_pred)
print ( "模型准确率:" , accuracy)
1.1、对应输出
2.随机森林混淆矩阵结果
data = pd. read_csv( 'C:\\Users\\31425\\Desktop\\新建文件夹\\序列\\蒿属1.csv' )
labels = data[ 'species' ]
data = data. drop( 'species' , axis= 1 )
train_data, test_data, train_labels, test_labels = train_test_split( data, labels, test_size= 0.2 , random_state= 42 )
rfc = RandomForestClassifier( n_estimators= 100 , random_state= 42 )
rfc. fit( train_data, train_labels)
predictions = rfc. predict( test_data)
cm = confusion_matrix( test_labels, predictions)
print ( cm)
2.1矩阵结果
3、随机森林对影响蒺藜科花粉判别的自变量重要性基尼系数排序图
data = pd. read_csv( 'C:\\Users\\31425\\Desktop\\新建文件夹\\序列\\蒿属1.csv' )
X = data. drop( 'species' , axis= 1 )
y = data[ 'species' ]
model = RandomForestClassifier( n_estimators= 100 , random_state= 42 )
model. fit( X, y)
feature_importances = model. feature_importances_
feature_names = X. columns. values
indices = np. argsort( feature_importances) [ : : - 1 ]
plt. bar( range ( X. shape[ 1 ] ) , feature_importances[ indices] )
ontainer = plt. bar( range ( X. shape[ 1 ] ) , feature_importances[ indices] )
padding = 0.01
for rect in plt. bar( range ( X. shape[ 1 ] ) , feature_importances[ indices] ) :
height = rect. get_height( )
plt. text( rect. get_x( ) + rect. get_width( ) / 2 , height + padding, f" { height: .3f } " , ha= "center" , va= "bottom" )
plt. xticks( range ( X. shape[ 1 ] ) , feature_names[ indices] , rotation= 90 )
plt. rcParams[ 'font.family' ] = 'sans-serif'
plt. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
plt. title( "随机森林对影响蒺藜科花粉判别的自变量重要性基尼系数排序图" )
plt. show( )
自变量重要性基尼系数排序图