文本分类学习笔记(4)- LR模型

LR分类器实现,多分类的softmax模型:

#coding=utf-8
from scipy import sparse,io
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
from numpy import *
import warnings
warnings.filterwarnings("ignore")

#sigmoid函数定义
def sigmoid(wx):
    return 1.0/(1+exp(-wx))

#每个类别输出概率为prob[iClassNum]
def getPi(X,W,c):
    prob = []
    for i in range(c):
        prob.append(float(exp(W[i]*X))/float(1+sum(exp(W*X))))
    return prob

#随机梯度下降
def stocGradAscent(maxI,dataMatrix,classLabels,c):
    m,n = shape(dataMatrix)
    alpha = 0.5#学习率\梯度上升步长
    weights = mat(ones((c,n)))
    for k in range(maxI):
        #每个样本
        dCost = 0.0
        for i in range(m):
            #计算样例i的每个类别输出概率为prob[iClassNum]
            prob = getPi(dataMatrix[i].transpose(),weights,c)
            probi = prob.index(max(prob))
            #print probi,
            cc = classLabels[i]-1
            #print cc
            for cl in range(c):
                if cc == cl:
                    weights[cl] = weights[cl] - alpha*dataMatrix[i]*(prob[cl]-1)
                    dCost -= log(prob[cl])
                else:
                    weights[cl] = weights[cl] - alpha*dataMatrix[i]*prob[cl]
                    dCost -= log(1.0 - prob[cl])
        print 'id=',k,
        print 'cost=',dCost/m
    return weights

def get_weights(dataMatrix,classLabels,maxl,matfilename):
    m,vectornum = shape(dataMatrix)
    print m,vectornum
    #第一列x0
    a = ones(m)
    vectormat = mat(c_[a,dataMatrix])
    weights = stocGradAscent(maxl, vectormat, labeled_names, 10)
    data = {}
    data['weights'] = weights
    io.savemat(matfilename, data)
    return weights

def calculate_result(actual,pred):
    m_precision = metrics.precision_score(actual,pred)
    m_recall = metrics.recall_score(actual,pred)
    m_acc = metrics.accuracy_score(actual,pred)
    print 'predict info:'
    print 'accuracy:{0:.3f}'.format(m_acc)
    print 'precision:{0:.3f}'.format(m_precision)
    print 'recall:{0:0.3f}'.format(m_recall)
    print 'f1-score:{0:.3f}'.format(metrics.f1_score(actual,pred))

if __name__ == '__main__':
    #读取中间数据
    data = io.loadmat('SetMat1.mat')
    #'''
    vectormat = data['trainSet']
    labeled_names = data['train_labeled'][0]
    #洗牌
    r = random.randint(2147483647)
    random.seed(r)
    random.shuffle(vectormat)
    random.seed(r)
    random.shuffle(labeled_names)
    #'''
    labeled_names1 = data['test_labeled'][0]
    vectormat1 = data['testSet']
    print 'load finished'

    #训练权值
    weights = get_weights(vectormat,labeled_names,30,'weight.mat')
    data1 = io.loadmat('weight.mat')
    weights = data1['weights']

    m = shape(vectormat1)[0]
    a = ones(m)
    vectormat1 = mat(c_[a,vectormat1])
    #预测所有测试集
    proba = []
    for i in range(m):
        probi = getPi(vectormat1[i].transpose(), weights, 10)
        probi = probi.index(max(probi))+1
        proba.append(int(probi))
    calculate_result(labeled_names1,proba)

    c = zeros((10,10), dtype=int)
    for i in range(len(proba)):
        c[labeled_names1[i]-1][proba[i]-1] = c[labeled_names1[i]-1][proba[i]-1] + 1
    print c

运行结果

predict info:
accuracy:0.881
precision:0.877
recall:0.881
f1-score:0.877
[[ 694    0    5   17    0    0    1    1    1    0]
 [   0   31    1    0   19    0    0    1    2    2]
 [   6    0  172    1    0    0    0    9    1    0]
 [  11    0    1 1075    0    0    0    0    0    0]
 [   0   32    2    0   57    1    1   11   13   32]
 [   0    0    0    0    0  102   26    0    3    0]
 [   0    0    1    1    0   30  139    0    8    0]
 [   3    0   37    1    0    0    0   45    3    0]
 [   2    0    1    0    1    1    5    0  107    0]
 [   0   10    1    0   24    0    0    2    2   32]]
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值