LR分类器实现,多分类的softmax模型:
#coding=utf-8
from scipy import sparse,io
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
from numpy import *
import warnings
warnings.filterwarnings("ignore")
#sigmoid函数定义
def sigmoid(wx):
return 1.0/(1+exp(-wx))
#每个类别输出概率为prob[iClassNum]
def getPi(X,W,c):
prob = []
for i in range(c):
prob.append(float(exp(W[i]*X))/float(1+sum(exp(W*X))))
return prob
#随机梯度下降
def stocGradAscent(maxI,dataMatrix,classLabels,c):
m,n = shape(dataMatrix)
alpha = 0.5#学习率\梯度上升步长
weights = mat(ones((c,n)))
for k in range(maxI):
#每个样本
dCost = 0.0
for i in range(m):
#计算样例i的每个类别输出概率为prob[iClassNum]
prob = getPi(dataMatrix[i].transpose(),weights,c)
probi = prob.index(max(prob))
#print probi,
cc = classLabels[i]-1
#print cc
for cl in range(c):
if cc == cl:
weights[cl] = weights[cl] - alpha*dataMatrix[i]*(prob[cl]-1)
dCost -= log(prob[cl])
else:
weights[cl] = weights[cl] - alpha*dataMatrix[i]*prob[cl]
dCost -= log(1.0 - prob[cl])
print 'id=',k,
print 'cost=',dCost/m
return weights
def get_weights(dataMatrix,classLabels,maxl,matfilename):
m,vectornum = shape(dataMatrix)
print m,vectornum
#第一列x0
a = ones(m)
vectormat = mat(c_[a,dataMatrix])
weights = stocGradAscent(maxl, vectormat, labeled_names, 10)
data = {}
data['weights'] = weights
io.savemat(matfilename, data)
return weights
def calculate_result(actual,pred):
m_precision = metrics.precision_score(actual,pred)
m_recall = metrics.recall_score(actual,pred)
m_acc = metrics.accuracy_score(actual,pred)
print 'predict info:'
print 'accuracy:{0:.3f}'.format(m_acc)
print 'precision:{0:.3f}'.format(m_precision)
print 'recall:{0:0.3f}'.format(m_recall)
print 'f1-score:{0:.3f}'.format(metrics.f1_score(actual,pred))
if __name__ == '__main__':
#读取中间数据
data = io.loadmat('SetMat1.mat')
#'''
vectormat = data['trainSet']
labeled_names = data['train_labeled'][0]
#洗牌
r = random.randint(2147483647)
random.seed(r)
random.shuffle(vectormat)
random.seed(r)
random.shuffle(labeled_names)
#'''
labeled_names1 = data['test_labeled'][0]
vectormat1 = data['testSet']
print 'load finished'
#训练权值
weights = get_weights(vectormat,labeled_names,30,'weight.mat')
data1 = io.loadmat('weight.mat')
weights = data1['weights']
m = shape(vectormat1)[0]
a = ones(m)
vectormat1 = mat(c_[a,vectormat1])
#预测所有测试集
proba = []
for i in range(m):
probi = getPi(vectormat1[i].transpose(), weights, 10)
probi = probi.index(max(probi))+1
proba.append(int(probi))
calculate_result(labeled_names1,proba)
c = zeros((10,10), dtype=int)
for i in range(len(proba)):
c[labeled_names1[i]-1][proba[i]-1] = c[labeled_names1[i]-1][proba[i]-1] + 1
print c
运行结果
predict info:
accuracy:0.881
precision:0.877
recall:0.881
f1-score:0.877
[[ 694 0 5 17 0 0 1 1 1 0]
[ 0 31 1 0 19 0 0 1 2 2]
[ 6 0 172 1 0 0 0 9 1 0]
[ 11 0 1 1075 0 0 0 0 0 0]
[ 0 32 2 0 57 1 1 11 13 32]
[ 0 0 0 0 0 102 26 0 3 0]
[ 0 0 1 1 0 30 139 0 8 0]
[ 3 0 37 1 0 0 0 45 3 0]
[ 2 0 1 0 1 1 5 0 107 0]
[ 0 10 1 0 24 0 0 2 2 32]]