Python代码如下 (参考:https://blog.csdn.net/weixin_43439673/article/details/84142628)
# -*- coding: UTF-8 -*-
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
# 样本数据集,第一列为x1,第二列为x2,第三列为分类(二种类别)
data=[
[-0.017612,14.053064,0],
[-1.395634,4.662541,1],
[-0.752157,6.538620,0],
[-1.322371,7.152853,0],
[0.423363,11.054677,0],
[0.406704,7.067335,1],
[0.667394,12.741452,0],
[-2.460150,6.866805,1],
[0.569411,9.548755,0],
[-0.026632,10.427743,0],
[0.850433,6.920334,1],
[1.347183,13.175500,0],
[1.176813,3.167020,1],
[-1.781871,9.097953,0],
[-0.566606,5.749003,1],
[0.931635,1.589505,1],
[-0.024205,6.151823,1],
[-0.036453,2.690988,1],
[-0.196949,0.444165,1],
[1.014459,5.754399,1]
]
#生成X和y矩阵
dataMat = np.mat(data)
y = dataMat[:,2] # 类别变量
b = np.ones(y.shape) # 添加全1列向量代表b偏量
X = np.column_stack((b, dataMat[:,0:2])) # 特征属性集和b偏量组成x
X = np.mat(X)
# 特征数据归一化
scaler=MinMaxScaler()
X1 = scaler.fit_transform(dataMat[:,0:2]) # 对特征数据集归一化,矩阵不能有负
X1 = np.mat(X1)
# ========逻辑回归和多项式贝叶斯========
mnb=MultinomialNB()
mnb.fit(X1.A,y.A)
model = LogisticRegression()
model.fit(X, y)
print('逻辑回归模型:\n',model)
print('多项式贝叶斯分类模型:\n',mnb)
#使用模型预测
predictedLR = model.predict(X)
predictedMNB= mnb.predict(X1.A)
#预测分类概率
proba = model.predict_proba(X)
proba1 = mnb.predict_proba(X1.A)
print(proba)
print(proba1)
#输出准确率
print('准确率1:', accuracy_score(y, predictedLR))
print('准确率2:', accuracy_score(y, predictedMNB))
其中:逻辑回归中拟合X,y的数据格式是 <class 'numpy.matrix'> ,但多项式回归拟合的数据格式为'numpy.ndarray',所以用X.A,y.A将X,y转为ndarray。
输出结果:
逻辑回归模型:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False)
多项式贝叶斯分类模型:
MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
[[0.92772859 0.07227141]
[0.33464154 0.66535846]
[0.47802598 0.52197402]
[0.55227502 0.44772498]
[0.80270556 0.19729444]
[0.48844173 0.51155827]
[0.87928275 0.12071725]
[0.56310224 0.43689776]
[0.6976256 0.3023744 ]
[0.77444597 0.22555403]
[0.46070734 0.53929266]
[0.88647539 0.11352461]
[0.17272211 0.82727789]
[0.72667536 0.27332464]
[0.40141409 0.59858591]
[0.10823546 0.89176454]
[0.419825 0.580175 ]
[0.17052533 0.82947467]
[0.0847848 0.9152152 ]
[0.35358284 0.64641716]]
[[0.50060924 0.49939076]
[0.45897962 0.54102038]
[0.45930678 0.54069322]
[0.47632299 0.52367701]
[0.46817322 0.53182678]
[0.43836779 0.56163221]
[0.47571961 0.52428039]
[0.49886317 0.50113683]
[0.4535919 0.5464081 ]
[0.4731553 0.5268447 ]
[0.42778984 0.57221016]
[0.46429256 0.53570744]
[0.39320124 0.60679876]
[0.50114636 0.49885364]
[0.44932823 0.55067177]
[0.38681481 0.61318519]
[0.44071424 0.55928576]
[0.4151102 0.5848898 ]
[0.40190624 0.59809376]
[0.41562854 0.58437146]]
准确率1: 0.9
准确率2: 0.65
也可将MultinomialNB换成GaussianNB、BernoulliNB.
在此数据下GaussianNB结果准确率为85%,BernoulliNB为65%。
笔记: