from sklearn.naive_bayes import MultinomialNB # 多项式分布下的朴素贝叶斯
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import ShuffleSplit
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from ground import n_s
from sklearn import preprocessing
def km(filename):
df_raw = pd.read_excel(filename, sheet_name=0, index_col=0)
# print(df_raw)
df_normalized_data = df_raw.apply(lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)))
df_raw.fillna(0, inplace=True)
return df_raw
def min_max_normalization(np_array):
min_max_scaler = preprocessing.MinMaxScaler()
ret = min_max_scaler.fit_transform(np_array)
return ret
plt.rcParams['font.sans-serif'] = 'SimHei' # 解决中文乱码
plt.rcParams['axes.unicode_minus'] = False # 解决负号无法显示的问题
pd.set_option('expand_frame_repr', False) # 禁止换行
# pd.set_option('display.max_columns', 20) # 显示所有行
pd.set_option('display.max_rows', 10) # 设置Dataframe数据的显示长度,默认为50
score_train = []
score = []
r2_train = []
r2 = []
filename = '带标签的数据_速度划分.xlsx'
data = pd.DataFrame(pd.read_excel(filename)).values # 输入特征
data = np.array(data)
Y = data[:, -1]
X = data[:, 2:-1]
# 利用sklearn中的SVC()创建分类器对象,其中常用的参数有C(惩罚力度)、kernel(核函数)、gamma(核函数的参数设置)
# decision_function_shape(因变量的形式),再利用fit()用训练数据拟合分类器模型。
model = MultinomialNB()
# 把数据分50分,其中30%作为测试集,50次测试,每一次都用30%的数据集作为测试
ss = ShuffleSplit(n_splits=n_s, train_size=0.7, test_size=0.3, random_state=3) # 训练集和测试集的比例为7:3。
X, Y = shuffle(X, Y, random_state=1337)
for train_index, test_index in ss.split(X, Y):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
scaler = MinMaxScaler()
X_train = min_max_normalization(X_train)
X_test = min_max_normalization(X_test)
model.fit(X_train, y_train.astype('int'))
y_pred_train = model.predict(X_train)
y_pred = model.predict(X_test)
print("Confusion Matrix:", confusion_matrix(y_test, y_pred)) # 混淆矩阵,3*3某行某列代表把某物归为此类的数量
print("Classification Report:", classification_report(y_test, y_pred)) # 输出分类结果矩阵
print("train_Accuracy:", accuracy_score(y_train, y_pred_train)) # 准确度
print("test_Accuracy:", accuracy_score(y_test, y_pred))
score.append(accuracy_score(y_test, y_pred))
score_train.append(accuracy_score(y_train, y_pred_train))
#
print('训练集R^2:', model.score(X_train, y_train)) # R^2回归平方和与总平方和的比值
print('测试集R^2:', model.score(X_test, y_test))
r2_train.append(model.score(X_train, y_train))
r2.append(model.score(X_test, y_test))
plt.plot(range(len(score)), score, c="green")
plt.xlabel('测试次数')
plt.ylabel('准确度(%)')
plt.legend(['测试集准确度'])
plt.show()
# # plt.plot(range(len(r2_train)), r2_train, c="blue")
plt.plot(range(len(r2)), r2, c="green")
plt.xlabel('测试次数')
plt.ylabel('r2决定系数')
plt.legend(['训练集决定系数', '测试集决定系数'])
plt.show()
df = pd.DataFrame(score)
df.to_csv(str(filename)+"bys_score.xlsx")
bys贝叶斯分类算法python代码实现
于 2024-07-15 15:39:46 首次发布