以logistic Regression为例实现多类别分类及Python实现

      关于logistic 回归的内容,参照前面的文章,这里主要讲了多分类的方法和Python的实现,

   1.第一种简单的方法是一对所有(one-Versus-All,OVA),给定m个类,训练m个二元分类器(将选取任意一类,再将其它所有类看成是一类,构建一个两类分类器)。分类器j使类j的元组为正类,其余为负类,进行训练。为了对未知元组X进行分类,分类器作为一个组合分类器投票。例如,如果分类器j预测X为正类,则类j得到一票。如果他测得X为正类,则类j得到一票。如果测X为负类,则除j以外的每一个类都得到一票(相当于此类的票数减一)。得票最多的指派给X。
    这种方法简单有效,而且使用类似logistic这种有概率值大小可以比较的情况下,类边界其实是个有范围的值,可以增加正确率。而且当K(类别数量)很大时,通过投票的方式解决了一部分不平衡性问题。
# -*- coding: utf-8 -*-

from logisticRegression import *
from numpy import *
import operator

#知道了Iris共有三种类别Iris-setosa,Iris-versicolor和Iris-virginica
def loadDataSet(filename):
	numFeat = len(open(filename).readline().split(','))-1
	dataMat = []; labelMat = []
	fr = open(filename)
	for line in fr.readlines():
		lineArr = []
		curLine = line.strip().split(',')
		for i in range(numFeat):
			lineArr.append(float(curLine[i]))
		dataMat.append([1]+lineArr)  #这里是为了使 x0 等于 1
		labelMat.append(curLine[-1])
	return dataMat,labelMat

# voteResult = {'Iris-setosa':0,'Iris-versicolo':0,'Iris-virginica':0}#记录投票情况
voteResult = [0,0,0]
categorylabels = ['Iris-setosa','Iris-versicolor','Iris-virginica']#类别标签
opts = {'alpha': 0.01, 'maxIter': 100, 'optimizeType': 'smoothStocGradDescent'}
#训练过程
dataMat,labelMat = loadDataSet('train.txt')

weight1 = []
for i in range(3):#三类
	labelMat1 = []
	for j in range(len(labelMat)):#把名称变成0或1的数字
		if labelMat[j] == categorylabels[i]:
			labelMat1.append(1)
		else:
			labelMat1.append(0)
	dataMat = mat(dataMat);labelMat1 = mat(labelMat1).T
	weight1.append(logisticRegression(dataMat,labelMat1,opts))

#测试过程
dataMat,labelMat = loadDataSet('test.txt')
dataMat = mat(dataMat)

initial_value = 0
list_length = len(labelMat)
h = [initial_value]*list_length

for j in range(len(labelMat)):
	voteResult = [0,0,0]
	for i in range(3):
		h[j] = float(sigmoid(dataMat[j]*weight1[i]))#得到训练结果
		if h[j] > 0.5 and h[j] <= 1:
			voteResult[i] = voteResult[i]+1+h[j]#由于类别少,为了防止同票,投票数要加上概率值
		elif h[j] >= 0 and h[j] <= 0.5:
			voteResult[i] = voteResult[i]-1+h[j]
		else:
			print 'Properbility wrong!'
	h[j] = voteResult.index(max(voteResult))
print h
labelMat2 = []
for j in range(len(labelMat)):#把名称变成0或1或2的数字
	for i in range(3):#三类
		if labelMat[j] == categorylabels[i]:
			labelMat2.append(i);break

#计算正确率
error = 0.0
for j in range(len(labelMat)):
	if h[j] != labelMat2[j]:
		error = error +1

pro = 1 - error / len(labelMat)#正确率
print pro
</pre><pre class="python" name="code">    
<span style="white-space:pre">	</span><span style="font-family:SimSun;font-size:24px;">没有优化的情况下的准确率:<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAnQAAACNCAYAAAA+a+c3AAAe8ElEQVR4Ae3dTWgjaX7H8V/NyylLsifJSE0gs9tZSEurjSPDviSE0G5iLRbZzTS5uBMH7MNm9iD1wTO0ySGwwQPjgO3LnuyEBhvysoeADD2DuwnJLCFEGk800qW3oQcCVreU6UtmBjYnhadKZZflemRJlt0q+9vQSHpUep7/83meUv1V9chyfvf3/6At/iGAAAIIIIAAAghEVuC1/2k1Ixs8gSOAAAIIIIAAAghIr732ioMDAggggAACCCCAQIQFXolw7ISOAAIIIIAAAgggIImEjmmAAAIIIIAAAghEXICELuIDSPgIIIAAAggggAAJHXMAAQQQQAABBBCIuAAJXcQH8GWH324nNHvvnmYT/i1/BedljwntI4AAAldR4Kofj0aS0LUTs7p3b1aJ9ugP5u12Vgtra1pbW1D2HOrvNekva7969Xng55KTSrf2VJJ3u9s4+7em29kFrY1oPl3E/EnM3tP6+vpLmaPB8RrWbZj4Pdez7ZPDtBvsr39/VPX49YXd9uqv+z6xtubOgYWs9x7Ya/uw+gctO4/6r+r8GdTetj3z0CZzgeXncDy6wOjP3NRrYTWYBCrteAfmdm1bd7cqYZsdlR3sq6Z5TSZLajSOirMLa7qTPjrA17YL2qocPT7a0n7PcSraLEqL6xn7Rn0+Y954l5duKt7pmx+Pyerzy0uajnf63Hyo1ZWSDiz96rO5npuNsl89GzrnJ6cmpfubZU3l8+6tHEfd497XHOrE6Y7FrZTqe5tqdMbpLF24COfG7rsqlLIjmaPD9vUsbsPF31CzPqHA7j5w6MO1e7KZUdVzsuZgSXh/Xff5m2rtFLVSntLi+qKy5U2VFb59sMaz3R9t/f78+e//S+ntjWk1H76nd3cbh/tyr314OP+zxz9cu2dTN692rYLHi84x8mLiCXe7LPPwrKMTdjw6a51Ren3oGbrN4o5q7Zq2CwUVqxndm0307JPjNLRfk9KTyWPblTeP6im891CxucW+zrIlZmf72u5YY6c8cCd8TrpfLKpYLKqwXVPqMJ6EVFv1ygvb2tkrucmErV+nNGV9+jz6ZW3sgp6o7O66Vv6taTY47sZ6pznR/9nbqZxu6pEelIfvwGV0PlVjBG62NsI8zb6xu+WNve11l6nc3t+E4rG6qmXJfHjYuruliuPIvv3gKhfi35k/f/8326q124qlJ5VoJzQRMwlMTTubw++QFxL/4KxDvyKZn1c6cLyoxwZ4fxu6Ve+F9nl1SebhGX3845B/e8bqIvfy0ITuWC/KVbXcnbv3pYQDL6Pr/8B9rJGjB+ZSQu5m/KhgZPcSiqfiOkxNy1XVO3WbN+LdXe9cQzI/IQXeu8a/XyMDOoeKprSwtiCVvATZNNDrUpF/lqDeSaiHCej85s8w0VzMa0bhZov0KnraLF5G+UX4h86fmHmvNEnC2Xp9EfGfLcLBXm2sJtNSbf/AfaE5djyo6ei4Mlh1kdn6so1jZOAHDDT0kuuJOtydW+7lFe9TaMV8HD22mdPY1V5rTbmpko5foU3pzsaG7kgylzjLmtLi2px7Sdc83tSiNu6k3U+B29vSn/5p2q03vbGhOfPJsGguX3j/JvLLWp+OH14OMKX+ZdSYWnq0uqLSgbnsMaeU6nr0KKbpw+3Np+ejuJP5W4o9uu9+mu5U7yYbk9rXbqBvYf0ylxTnUnXV6yml044bz0op4bZrLlVfbL96X/Sy+xivPcWXPCvjbM4sdG8//Jo4M+7euFZ9YPlnMY7GIfCU1DlLsGoG3JF7uScqzsf6EXhg8zTrbd6e9j64dM+XndWmbi3dVEx1bRf+S9/auBMynwPjHnBL5O9p6abUasXc19eVUqy2enj5zHiasdbiujuH/XEPhHx416ypMvum+Xdsf5w62me7X+/31yxrCF6m6+7voEsv/KBs9fvPd9/a2g0uC2h3lliYS/xh5QeW/gbrPg+fs/if/n4YPn88P++9LZNvKlavq57ySv33vfOYPyZhMO/b/lIffxzds4PmGJDMHy6XCc4rf7vu2+DYmP3Ln29h42sb9/AlHybJbalq8rnOIbDRuUrRHUPw8SjiufTzMAimwd//u9+Luqq7Eg/7S+j6pChX65rLTEmV4Jo7c1DykgWzB5iE0F0TtzzhXlZznC1tT8yq0TmLU6l4a5GqnQTDTRzdk4MppbWqQsEkTjllSybRSyo/n1ZttaiS8lqez2t/pdRZczd3YnuTsJh/Zuea1333IHesa1MT0n4nmwg80d0vc0kxsz4nVYsqPDDtTipZ6rT7EvsVCNm9637y9n1MorucV9L1aSi/nNPzg4ZUf6QHm94ZtLDtEytHZ9e66+/92Ix7VZk+1z4enSU4WjsXFWebg83zQFPK3Wxpu7DifcBZX9RUcVMFdw3WnObmm1otFr01hK9IlaLjHey65r+bgJv1n4E1h+3Snuo3b6l5f0fxpVtqru5IS539xZ233lpUz7b3ulSnsqVC+eT+GCwP9t3rb3A9WUbZdtntY3d/zTozf38M1tHrvq1+Wz3eWYXjzt76tillUsH3JfeThvuBLqzc1t/gmqng+5Vte1v85kNuqM9Q/oO9H7oxBeaPOt9razZbSqfNqaialPI+eATnTPC+bcyCDqf5HJ4oCKms7b7Pn5xX5z3uIaEMVcQ87O+43I07zPt/dx1X7fHpl1yTE4q1mv0tfjaXMVPmTbzzrmDR9E5TpzWfT8p8Cs087ydpqGuv5J3mPqrW+7T03BQfPFercybRez5sey/rD03mJE1l4nLrOmrAuxfaL2/dTLDdl9mv7pC9x8Ynpum3N7RhzvJ0fMw6jNKKSbbmpQdB+/Dtw+vup7TsrikqT02dOif8s3Mn185FwdlmEe7pfqjZkXvm2h2XYy+vayc0iQ6fz+Furc489m+PNXCOD0x/w9aTVbRp7e8g4YTXb6vB5uyWF838N99MNn9yx3u/spXb6h+8PDx+W5zD1d8Z8z7fD8Pnj/TcLKGJmUuLzwcPY+SvCHezNWPztI2vrTy8/oaarZgmAsvF24lEz6VG5xtPeJS9S8M9bXH2rivsWVP/gPMwrBq3bFTv/9YGLtUTpyZ0SW/BQF/fODQTolpPyZykO+3fQem+aukluSe6/GuqXS9qZ7OnJAKBnauPxNOcmctUi4dn5oKLdc2nqEzn0kJXGO5ZxZP96uzUXe2+jH51x3v02Pi09PC9gvuFj+LqAzcxdz+VL2dULd6X5oNfVAnf/qi+we+5bWV6fxvy6CxBMLn024qCsx9r9224p3t5aU7ul47Ml4Va3S/r83Fvt1MqmcrIMt2tL+xvfzza//39a1T9NYs+mq2T9dsCtrVryvP5hrbu3lWxaJYdePuArdxW/+Dl4fHb4uyuvz//8P2luy7zuOf8MQmhenwgOJf5ExalKQt3s21t87SNr608rH7zYTj4BUC3rVyi5/HxPOMJi/H0snBPW5zd9Y16HnbXf/xx+Hzu5zh7vJ6r8ejVWCz2V91dXVxfUtqJ65u5nL775T/rL//h8eEm7qCv5/Tq+/uhk/jg1Wv6s8yren9fyi//SN/5SlzpyddV+/CxPg+uTXM+1+NfXtPk/zzQPz754rB+cyc5+UP94HZOM84n+tv9ZGdtRVzpay/0/qsZzWfSSs9c04v3/1X/8vSrur30lt783lf0Hz/dUuVzbw2diT+4feoPf0u//kZCv5nJaWZmxv3/3V95qg8+/EUnroQmZybUPKVfH3xs1p6Ybb+mN66/qdt/+DV9+s8b+rdn3uVc54L7ZRw+e/CKvr/+/RNj4sbS8bmdy2nmVz/VP7meP9K3v6zpgw8dfevNaU27lvt65nyhx13b/+zjZ8fGpp8HU4tLuhX35k8u9z39Ruvf5dcTOn+m/kRvfa2m+//gj4Xfyng5v7DMDfNB4a9//F3FO/vMb7/2iX7+i88V5m8c3IPCi0m98868Zr7+pT79MqWb05P6/g9P1uF5mbVFx+ezO+6fXdcPutzMN/D++GtvyLnmKB1PH97GX6/p508eq/n6jP7ix7c147RUj6U0fe2Fasn50Pj9UQjuj3/38TP3rPrGO7cO+5v87IE+bpjlFN4+PX9n3t2/vv70Z+6+Hdbf6d97Xc9+7U0th7j57Xbf2uq3+ze03+Vs2v3k3x4rcfttvXU7p1zum4rV92T6Zf594/bSiXJ3LVtIf931fD/5I73RNe627W3x23zc90w5x94PT/Xv8/3wtPkTf/0TPf20qV9845Y7n8x76QcfV851/nSPt//Y5nbe4+633337v4+f6qudeTIz42hvxfu293nHY5tXkZ+HluPXsMfZ7vG6So+dGzdu9L4+OqCGdwAya3dWdNqC+uzCgrQ5+FqaAUMayebBfnlfvDBnuMJjj1K/BsW5fv26njx5MujLrNu7ZwmWlxTfKx4uXPY39syvprNvYLvt5WZ7TRTLRz3fomhwHjFflfkzKjvm4agk+6+H9//+rfwtR57Q+RXbbr1BOvqWq//tI9v241juf1Mq+O24y9CvcbMeJ+fg+Aad/G/h2RZoB7flPgIIIIBAfwLj9P7fX8Qvf6sLT+hefpeJAAEEEEAAAQQQuFwCp34p4nJ1l94ggAACCCCAAAKXT4CE7vKNKT1CAAEEEEAAgSsmQEJ3xQac7iKAAAIIIIDA5RMgobt8Y0qPEEAAAQQQQOCKCZDQXbEBp7sIIIAAAgggcPkESOgu35jSIwQQQAABBBC4YgKv9erv4d+BaTf1qI8/FNyrLp5DAAEEEEAAgasp4P6ixdJNxR1H7dq27m5VribEOfa659+hy87OqlEK+33Nc4yIqhFAAAEEEEDgUgq4v1KST2h3l4Ru1APc8wxdd2P+z8VMxx0FfyWhe7vTHlNPbyF8LsbHb2VUP9VGPb5o+C0+4S5+6bj4BM+kmNhq24UTP8vnx9zP7Vn7Nap4xq0e3+6sPuNWz6ic/X5x27/AQAmd+eFv85ubxYrj/kD3cj6pd3fNj9UP9o96envhczE+7o9d30nL/fmu3k32fJZ6evK47xUbOFuRxmn+uB8mc9L9YlENc2ksu6D1uUVly+G/W23tlDSScR9VPONWj3Ebp3EfVTyjcu41r3jOLtD3lyLMQE2mpaafv5WraqUnlWi37bWHPEM9ISiBInwCGCF3R+VjqnYqWyoUtlUPaWeQIurprYVPlHwSiqfiSvghl6tD7x+jGfdRxTNu9VzW959ROfsTkNtBBPpO6KSE4rGWnh8Eqo8FdvxAce+71INPR+Clzp/eo8CzCFxFAcepaOvuliqO43Y/mb+l2KMHh48v2mRU8YxbPRfteFHtjcr5ouK9bO0MkNBdtq7THwQQQAABm0Bi9p7mdX+oZTW2Os9SPqp4xq2es5iM82tH5TzOfRy32AZI6BpqtmKaSAa60GrKvwIbKD3lLvX0BsLnYnx6t8KzCFxlAfMnq8YpmRtVPONWz2WdY6Nyvqw+59WvvhM6x2lovybF/cUVUxnFavvewtl2VgtrC8r2sZ6OenoPJT4X49OrlfYA85l6egn0fg7n8fQxZ1Yy1eLhmbnE7Kz73v6yxmtU8YxbPbbRf1nOo4rH5myrn/LRCQz0LdeD0n01l9e1fsf/syUHUmethQY4W0c9vQcQn4vx8b9lZlpLb2woE/zzDAPMZ+rpPV74RMfHJBO5mzGlnQ2t3/Hidv9EVanThwveL0YVz7jVYzQv435xqnPvXYFnzyjQ+w8LL6zpTtpRu8cvRZhvHeaX56X7K9pteAtpw2K6fv26njx5EvaUW0Y9+DB/Tu4e7BfsF+wX7BfdAlE7npr4g3+fjl+K6B7R0TzumdCNpglqQQABBBBAAAEEEDhPgb7X0J1nENSNAAIIIIAAAgggMLwACd3wdrwSAQQQQAABBBAYCwESurEYBoJAAAEEEEAAAQSGFyChG96OVyKAAAIIIIAAAmMhQEI3FsNAEAgggAACCCCAwPACJHTD2/FKBBBAAAEEEEBgLARI6MZiGAgCAQQQQAABBBAYXoCEbng7XokAAggggAACCIyFAAndWAwDQSCAAAIIIIAAAsMLhP6Wq/dzQ0uajvu/2VpSw//N1pC2gr9JZ56udX4Tk/KCtir2n0MLoaQIAQQQQAABBBAYWCA0oUvm5xXfK6pYcdwfEF7OJ/XubiO0cvNjvItzMT18r+D+lqtJ4tbnFvU7//lf+tatpt4rbLrJoPc7blevPFveVKVHMhyKSiECCCCAAAIIIDCAQGhCN5mWmvudWspVtZYnlSgdWM7SNdRsBVosV1W/NaFnTkUfvetIfjKTiCtWr+qjVz7SR+/qypSTzAXmBncRQAABBBBA4FwEQhO6eKyl6oFJujptxuJKSAo7R+c4DZVW9rS4vq51x1G7XdNO0TsrZ17tnsFbn1OqvqO7W5XDTly18sOOcwcBBBBAAAEEEBixgHPjxo12d50LawuqFr1LhV7ilTl83L2teWwusy5q010vlpi9p3ndP3GJ1r0Um6keS+r8165fofIwP8oQQAABBBBAAIGzCIR+y7XZimkiGai21Qw9O2e28NbQSdWyt/1BaU+tmzll2115orkUm8pQHmDlLgIIIIAAAgggMAqB0IRuvybFzTVW828qo1ht37J+rrONUspMde4nJxTrJHoL92aV8BO7qYxSraYO2lldpfKwy9S+GrcIIIAAAggggMAoBEIvudr+bInt8qv3Ddabirtr6Jp6tLpy+I3XjTtpN05/bZ35kkDwz5lchfJRDBR1IIAAAggggAACNoHQhM62MeUIIIAAAggggAAC4ycQesl1/MIkIgQQQAABBBBAAAGbAAmdTYZyBBBAAAEEEEAgIgIkdBEZKMJEAAEEEEAAAQRsAiR0NhnKEUAAAQQQQACBiAiQ0EVkoAgTAQQQQAABBBCwCZDQ2WQoRwABBBBAAAEEIiJAQheRgSJMBBBAAAEEEEDAJkBCZ5OhHAEEEEAAAQQQiIgACV1EBoowEUAAAQQQQAABmwAJnU2GcgQQQAABBBBAICICJHQRGSjCRAABBBBAAAEEbAIkdDYZyhFAAAEEEEAAgYgIkNBFZKAIEwEEEEAAAQQQsAmQ0NlkKEcAAQQQQAABBCIiQEIXkYEiTAQQQAABBBBAwCZAQmeToRwBBBBAAAEEEIiIAAldRAaKMBFAAAEEEEAAAZsACZ1NhnIEEEAAAQQQQCAiAiR0ERkowkQAAQQQQAABBGwCJHQ2GcoRQAABBBBAAIGICJDQRWSgCBMBBBBAAAEEELAJkNDZZChHAAEEEEAAAQQiIkBCF5GBIkwEEEAAAQQQQMAmQEJnk6EcAQQQQAABBBCIiAAJXUQGijARQAABBBBAAAGbAAmdTYZyBBBAAAEEEEAgIgIkdBEZKMJEAAEEEEAAAQRsAiR0NhnKEUAAAQQQQACBiAhc6oSunZjVvbU1ra+vayHbdoek3c5qYW1B2bb3+LRx6nd7b7s1rQ1Qt992u53Q7L3hXntUR3/9Okucflv93Pbr1k9dg24zCs9B22R7BBBAAAEEXqZAaEKXXfCSIJMIBZOhlxnooG2bg3p+/qZaO0UVCtvS3GIniWuoWW+q0XeF/W3vOBVtFndU77veow0dp6HSynCvParl/OM8aqufe/3F009Ng25zVk8/ITRzf+3erBJ9Jv+Dxsn2CCCAAAIIjEogNKErb+6o1q5pu1BQ4b2Hih0mQ6Nq9iLqSSgeq6talkyytXV3SxXHkTnY727tquE4fQUx6PZ9VXoOG/WKMzE72/cZyVGFNm7xDNKvZH5e8b2iisWiintxzeeTg7ycbRFAAAEEELhwgdCE7sKjoMFzEzCXPnM34+dW/6AVj1s83fGbs3OTaanpn8ItV9VKT3KWrhuKxwgggAACYyXwmj2alO5sbOiOpNp2QeWpRW3cSavdrmlntalbSzcVU107xU33zJdZr7bslrX0aHVFuw3vDJhfHncctWvburtVcZv0y2M6vr253Hsn3Xlt86FWV0ru2TRbeVj8idl7envaS2LSGxuaMzEXN4/3wY+7ndXi+pxSquvRo5imp+NqPnxP7+421M4uHPW5s73fXlg8B50nJ/LLWg/UY4pt/T1so9lUSy2/+hO3JhEycapeVzqdVq1WUyola7/cNjvxm/vHHDq1h8V5ouFOQXf8pYOpjpsZvz3FlzxDm7Opxu+ruR+Mx5w5tf3z2+13/vhttLs8/Xq659vJds2Z3ZaqZjD9sGJxJaQBLtOfrJUSBBBAAAEEzlOgR0JX13bBS9bMkc3RlgplL6mYm29qtVj0LluaRM1dr5ZWbbWokvJans9rf6WkAyUP17GtlE0CkFG2XVbZLe9sbxKD5bwS7vZTyqSC7ZqDqqk/G1pug2nsvqtCycSaUdVPxMzl1orfh8zhS721b3KTk7RWVSgktLieU7a0qUrI9uaF1njc71mk1F2Pvb9J5W/F9PC9gue2ZD+TZuKs1m9p4sGmqlqUHlRVfVD2LiNb4gz2N+ggS5y2xOrY+HbGK7lS0maxofxyTs8PGlL9kR5sesl3sN1DaDOUgTiPxRPcKHDfa9dbB2mdP93zLcQzLH4z3/q97B4IibsIIIAAAgiMpUCPhM4Wb107Jw6GgbMaeq5WLOOe0ThQ9zq2ipugqW3KY0q/vaFpN0GqqWrOgLhfLDDJ1brmAmfujpKu4+W2CIcrr2uvZE7LmHMxvf/1jiesnvD+ej6ds0HGTfaEzkRUrra0PJlXPJXqnJ0zCwR7x2p/NixO29bh8TfcL3NUtbg+r+bqyjkkSL3mT8Dt2HwLlB96WuIP7W5DzdYtTZhlc/5l19YgX6IJrZRCBBBAAAEEzlVgiIQuLJ7AQVATih0eAE35nDJTUqUimcX5iVJJZZnylpr3vUuz7URC5vhpznzl8w1t3b3r3l9cX1TWXCrVVGi57YxSWISjLLPHaWslvL8mYzhMHhIZpSQ3sbXVIrOey1x2ffRIrfRRvmHdvuuJdjarqbI5Qzrov/D43TNfy+Ys6H1NLC8qu+Kf0e2vfj8e+ziadm3zp5N0nZhvnfJjnuHxh0VpvsyxX5Ny/jXWqYxitQfnkKyGtU4ZAggggAACwwm8GovF/qr7pfnlJX3nK3GlJ19X7cPH+txc9jTrsd65pbgT1zdzOf32a5/o57/43H2p43yux0+/qttLb+nN731F//HTLX30hflG6ed6/Mtrmr8zr5mZGX396c/0j0++8Mo729/O5TTzq5/qZx8/c+v6xu0lvXU7p1zum4rV9/R3p5R3x24eu+ulfvJHeqMr1u4+JD97oP3OWrC0E1f62gu9/2pG85m00jPX9Nlnk3on0Gez/cedtYHdcf7tftK7bBtSz4v3/1X/EtJfz+cb+vFfzGvGaakeS2n62gt98LF/auh470yy8eq1a2r+/Yd6+stnevzsC3eDsH75cZoNkpM/1A9u5zTjfKLece6HJi7B8fXH65/c/v5I3/6ypg8+dPStN6c1PXNNL97f14FZb2lx647HH9/jPfUe9TN/Ts63MM/Hh/PTj9+fb2Ht/u/jp0r++U/ceTgTr+mnWxV3HwjbljIEEEAAAQTGQcC5ceNGf39hdxyi7RHD9evX9eTJkx5bRPOpl9Uv2o3mfCFqBBBAAIGrKXBpErqrOXz0GgEEEEAAAQQQkPg7dMwCBBBAAAEEEEAg4gIkdBEfQMJHAAEEEEAAAQRI6JgDCCCAAAIIIIBAxAVI6CI+gISPAAIIIIAAAgiQ0DEHEEAAAQQQQACBiAuQ0EV8AAkfAQQQQAABBBAgoWMOIIAAAggggAACERcgoYv4ABI+AggggAACCCBAQsccQAABBBBAAAEEIi5AQhfxASR8BBBAAAEEEECAhI45gAACCCCAAAIIRFyAhC7iA0j4CCCAAAIIIIAACR1zAAEEEEAAAQQQiLgACV3EB5DwEUAAAQQQQAABEjrmAAIIIIAAAgggEHEBErqIDyDhI4AAAggggAACJHTMAQQQQAABBBBAIOICJHQRH0DCRwABBBBAAAEESOiYAwgggAACCCCAQMQFSOgiPoCEjwACCCCAAAIIkNAxBxBAAAEEEEAAgYgLkNBFfAAJHwEEEEAAAQQQIKFjDiCAAAIIIIAAAhEXIKGL+AASPgIIIIAAAgggQELHHEAAAQQQQAABBCIuQEIX8QEkfAQQQAABBBBAgISOOYAAAggggAACCERcgIQu4gNI+AgggAACCCCAAAkdcwABBBBAAAEEEIi4AAldxAeQ8BFAAAEEEEAAARI65gACCCCAAAIIIBBxARK6iA8g4SOAAAIIIIAAAiR0zAEEEEAAAQQQQCDiAiR0ER9AwkcAAQQQQAABBEjomAMIIIAAAggggEDEBUjoIj6AhI8AAggggAACCJDQMQcQQAABBBBAAIGIC5DQRXwACR8BBBBAAAEEECChYw4ggAACCCCAAAIRFyChi/gAEj4CCCCAAAIIIEBCxxxAAAEEEEAAAQQiLkBCF/EBJHwEEEAAAQQQQICEjjmAAAIIIIAAAghEXICELuIDSPgIIIAAAggggAAJHXMAAQQQQAABBBCIuAAJXcQHkPARQAABBBBAAIH/B6G5yRNZU8E/AAAAAElFTkSuQmCC" alt="" /></span>
2.第二种多分类方法为所有对所有(All-versus-all,AVA),也就是每次对一类学习一个分类器(one vs on at a time)。假定有M类,那么要构建m(m-1)/2个二元分类器。每一个分类器都使用它应该区分的两个类的元组来训练。为了对未知元组分类,所有的分类器投票表决。该元组被指派到得票数醉倒的类。一般来说‘所有对所有’优于‘一对所有’。解决了不平衡性,但是会占用更大的空间下面的程序主要修改了训练过程:
-*- coding: utf-8 -*-

from logisticRegression import *
from numpy import *
import operator

#知道了Iris共有三种类别Iris-setosa,Iris-versicolor和Iris-virginica
def loadDataSet(filename):
	numFeat = len(open(filename).readline().split(','))-1
	dataMat = []; labelMat = []
	fr = open(filename)
	for line in fr.readlines():
		lineArr = []
		curLine = line.strip().split(',')
		for i in range(numFeat):
			lineArr.append(float(curLine[i]))
		dataMat.append([1]+lineArr)  #这里是为了使 x0 等于 1
		labelMat.append(curLine[-1])
	return dataMat,labelMat

# voteResult = {'Iris-setosa':0,'Iris-versicolo':0,'Iris-virginica':0}#记录投票情况
voteResult = [0,0,0]
categorylabels = ['Iris-setosa','Iris-versicolor','Iris-virginica']#类别标签
opts = {'alpha': 0.01, 'maxIter': 50, 'optimizeType': 'smoothStocGradDescent'}
#训练过程
dataMat,labelMat = loadDataSet('train.txt')


dataMat2 = dataMat[0:40]+dataMat[80:120]
dataMat2 = mat(dataMat2)
dataMat = mat(dataMat)

weight1 = []
for i in range(3):#三类
	labelMat1 = []
	for j in range(len(labelMat)):#把名称变成0或1的数字
		if labelMat[j] == categorylabels[i]:
			labelMat1.append(1)
		else:
			labelMat1.append(0)
	if i == 0:
		weight1.append(logisticRegression(dataMat[0:80,:],labelMat1[0:80],opts))
	elif i == 1:
		weight1.append(logisticRegression(dataMat[40:120,:],labelMat1[40:120],opts))
	else:
		labelMat12 = labelMat1[0:40]+labelMat1[80:120]
		labelMat12 = labelMat12
		weight1.append(logisticRegression(dataMat2,labelMat12,opts))

#测试过程
dataMat,labelMat = loadDataSet('test.txt')
dataMat = mat(dataMat)

initial_value = 0
list_length = len(labelMat)
h = [initial_value]*list_length

for j in range(len(labelMat)):
	voteResult = [0,0,0]
	for i in range(2):
		h[j] = float(sigmoid(dataMat[j]*weight1[i]))#得到训练结果
		if h[j] > 0.5 and h[j] <= 1:
			voteResult[i] = voteResult[i]+1#由于类别少,为了防止同票,投票数要加上概率值
		elif h[j] >= 0 and h[j] <= 0.5:
			voteResult[i+1] = voteResult[i+1]+1
		else:
			print 'Properbility wrong!'
	h[j] = float(sigmoid(dataMat[j]*weight1[2]))#得到训练结果
	if h[j] > 0.5 and h[j] <= 1:
		voteResult[2] = voteResult[2]+1#由于类别少,为了防止同票,投票数要加上概率值
	elif h[j] >= 0 and h[j] <= 0.5:
		voteResult[0] = voteResult[0]+1
	else:
		print 'Properbility wrong!'
	h[j] = voteResult.index(max(voteResult))

print h
labelMat2 = []
for j in range(len(labelMat)):#把名称变成0或1或2的数字
	for i in range(3):#三类
		if labelMat[j] == categorylabels[i]:
			labelMat2.append(i);break

#计算正确率
error = 0.0
for j in range(len(labelMat)):
	if h[j] != labelMat2[j]:
		error = error +1

pro = 1 - error / len(labelMat)#正确率
print pro

可知,最佳情况下准确率有所提高:


参考:http://blog.sina.com.cn/s/blog_5eef0840010147pa.html


  • 4
    点赞
  • 34
    收藏
    觉得还不错? 一键收藏
  • 9
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值