之前我们遇到的逻辑回归是用来处理一个二分类问题,例如垃圾邮件的分类判别,疾病恶性与否的判别等等;逻辑回归是非常强大的一个机器学习算法,它可以推广到一个多分类的问题,而且我们在平时遇到的问题也不是简单的二分类问题,往往是一个多分类问题;这一节我们将介绍逻辑回归的推广:Softmax Regression
多分类情况下,标签就不止一个了,而是y1,y2,y3,...... yk 我们假定属于第k类的概率为φk,并且为了避免冗余性保持他们之间的独立性:
同时我们作如下的设定:k-1行 k列的向量
(T(y))i 表示T(y)向量的第i个元素,同时我们引入一个非常有趣的计量式子:
(T (y))i = 1{y = i} E[(T (y))i] = P (y = i) = φi 属于第i类的概率;
首先给出联合分布律:
其中对应到指数簇函数集:
进而由此推导出
又因为ηi=θiTx ,同时我们定义θk= 0, 所以ηk=θk Tx = 0,
可以看出输出的是属于每一个类的概率大小,yk的概率可以通过1−Pk的形式求解
接下来就是要最大化似然函数:
l表示类别,比如样本属于第l类
下面我们通过一个示例来实现一下Softmax Regression,分类类别为4个,所用数据如下:
<span style="font-size:14px;">-0.017612 14.053064 2
-1.395634 4.662541 3
-0.752157 6.53862 3
-1.322371 7.152853 3
0.423363 11.054677 2
0.406704 7.067335 3
0.667394 12.741452 2
-2.46015 6.866805 3
0.569411 9.548755 0
-0.026632 10.427743 2
0.850433 6.920334 3
1.347183 13.1755 2
1.176813 3.16702 3
-1.781871 9.097953 2
-0.566606 5.749003 3
0.931635 1.589505 1
-0.024205 6.151823 3
-0.036453 2.690988 1
-0.196949 0.444165 1
1.014459 5.754399 3
1.985298 3.230619 3
-1.693453 -0.55754 1
-0.576525 11.778922 2
-0.346811 -1.67873 1
-2.124484 2.672471 1
1.217916 9.597015 0
-0.733928 9.098687 0
-3.642001 -1.618087 1
0.315985 3.523953 3
1.416614 9.619232 0
-0.386323 3.989286 3
0.556921 8.294984 0
1.224863 11.58736 2
-1.347803 -2.406051 1
1.196604 4.951851 3
0.275221 9.543647 0
0.470575 9.332488 0
-1.889567 9.542662 2
-1.527893 12.150579 2
-1.185247 11.309318 2
-0.445678 3.297303 3
1.042222 6.105155 3
-0.618787 10.320986 2
1.152083 0.548467 1
0.828534 2.676045 3
-1.237728 10.549033 2
-0.683565 -2.166125 1
0.229456 5.921938 3
-0.959885 11.555336 2
0.492911 10.993324 2
0.184992 8.721488 0
-0.355715 10.325976 2
-0.397822 8.058397 0
0.824839 13.730343 2
1.507278 5.027866 3
0.099671 6.835839 3
-0.344008 10.717485 2
1.785928 7.718645 0
-0.918801 11.560217 2
-0.364009 4.7473 3
-0.841722 4.119083 3
0.490426 1.960539 1
-0.007194 9.075792 0
0.356107 12.447863 2
0.342578 12.281162 2
-0.810823 -1.466018 1
2.530777 6.476801 3
1.296683 11.607559 2
0.475487 12.040035 2
-0.783277 11.009725 2
0.074798 11.02365 2
-1.337472 0.468339 1
-0.102781 13.763651 2
-0.147324 2.874846 3
0.518389 9.887035 0
1.015399 7.571882 0
-1.658086 -0.027255 1
1.319944 2.171228 1
2.056216 5.019981 3
-0.851633 4.375691 3
-1.510047 6.061992 3
-1.076637 -3.181888 1
1.821096 10.28399 0
3.01015 8.401766 0
-1.099458 1.688274 1
-0.834872 -1.733869 1
-0.846637 3.849075 3
1.400102 12.628781 2
1.752842 5.468166 3
0.078557 0.059736 1
0.089392 -0.7153 1
1.825662 12.693808 2
0.197445 9.744638 0
0.126117 0.922311 1
-0.679797 1.22053 1
0.677983 2.556666 1
0.761349 10.693862 0
-2.168791 0.143632 1
1.38861 9.341997 0
0.317029 14.739025 2
-2.65887965178 0.658328066452 1
-2.30615885683 11.5036718065 2
-2.83005963556 7.30810428189 3
-2.30319006285 3.18958964564 1
-2.31349250532 4.41749905123 3
-2.71157223048 0.21599278192 1
-2.99935111344 14.5766538514 2
-2.50329272687 12.7274016382 2
-2.14191210185 9.75999136268 2
-2.21409612618 9.25234159289 2
-2.0503599261 1.87312594247 1
-2.99747377006 2.82404034943 1
-2.39019233623 1.88778487771 1
-2.00981101171 13.0015287952 2
-2.06105014551 7.26924117028 3
-2.94028883652 10.8418044558 2
-2.56811396636 1.31240093493 1
-2.89942462914 7.47932555859 3
-2.83349151782 0.292728283929 1
-2.16467022383 4.62184237142 3
2.02604290795 6.68200376515 3
2.3755881562 9.3838379637 0
2.48299208843 9.75753701005 0
2.65108044441 9.39059526201 0
2.49422603944 11.856131521 0
2.47215954581 4.83431641068 3
2.26731525725 5.64891602081 3
2.33628075296 10.4603294628 0
2.4548064459 9.90879879651 0
2.13147505967 8.99561368732 0
2.86925733903 4.26531919929 3
2.05715970133 4.97240425903 3
2.14839753847 8.91032469409 0
2.17630437606 5.76122354509 3
2.86205491781 11.630342945 0</span>
所用代码依然采用“拍神”python神器,每一大步关键操作我都会给出一些讲解,如有问题欢迎交流!
<span style="font-size:14px;">from numpy import *
import matplotlib.pyplot as plt
class SoftmaxRegression:
def __init__(self):
self.dataMat = []
self.labelMat = [] #数组
self.weights = []
self.M = 0
self.N = 0
self.K = 0
self.alpha = 0.001
def loadDataSet(self,inputfile):
for line in open(inputfile,'r'):
items = line.strip().split()
self.dataMat.append([1.0, float(items[0]), float(items[1])]) #构造测试集 【1,x, y】的形式
self.labelMat.append(int(items[2])) #构造标签集 此时为数组?
self.K = len(set(self.labelMat)) #利用set函数去除重复元素 返回类别个数
self.dataMat = mat(self.dataMat)
self.labelMat = mat(self.labelMat).transpose() #矩阵化并转置
self.M,self.N = shape(self.dataMat)
self.weights = mat(ones((self.N,self.K))) # N=3数据,K=4类别
# self.weights = [[-1.19792777,6.05913226,-4.44164147,3.58043698],
# [ 1.78758743,0.47379819,0.63335518,1.1052592 ],
# [ 1.48741185,-0.18748907,1.79339685,0.90668037]] 输出四个概率h1,h2,h3,h4比较大小?
def likelihoodfunc(self):
likelihood = 0.0
for i in range(self.M):
t = exp(self.dataMat[i]*self.weights) # t为四个数 就等于那个小e
likelihood += log(t[0,self.labelMat[i,0]]/sum(t))
# t[0,self.labelMat[i,0]]就等于e;注意只算一次因为属于其中一类就肯定不属于另外三类中的了 乘值都变为1
# i样本属于四个类别当中的其中一个
print (likelihood) #我们的权值是不断变化调整的
def gradientAscent(self):
for l in range(10):
error = exp(self.dataMat*self.weights) # 4列
rowsum = -error.sum(axis=1) # axis=1表示每一行的求和 h1,h2,h3,h4
rowsum = rowsum.repeat(self.K, axis=1) #复制4次
error = error/rowsum #对应属于每一类的概率
for m in range(self.M):
error[m,self.labelMat[m,0]] += 1 #体现我们预测的与实际的差 错误率进行了归“ 一”化
self.weights = self.weights + self.alpha * self.dataMat.transpose()* error
#矩阵为3*M M*4 体现矩阵的思想
self.likelihoodfunc()
print (self.weights)
def stochasticGradientAscent_V0(self):
for l in range(500): # 500次循环迭代
for i in range(self.M): # 随机梯度上升每次只更新一个样本点
error = exp(self.dataMat[i]*self.weights)
rowsum = -error.sum(axis=1)
rowsum = rowsum.repeat(self.K, axis=1)
error = error/rowsum
error[0,self.labelMat[i,0]] += 1
self.weights = self.weights + self.alpha * self.dataMat[i].transpose()* error
#矩阵为3*M M*4 体现矩阵的思想
self.likelihoodfunc()
print (self.weights)
def stochasticGradientAscent_V1(self): #一次只更新一个样本
for l in range(500):
idxs = arange(self.M) #索引
for i in range(self.M):
alpha = 4.0/(1.0+l+i)+0.01 #改进的随机梯度上升算法
rdmidx = int(random.uniform(0,len(idxs))) #随机选取m个样本中的其中一个 索引
error = exp(self.dataMat[rdmidx]*self.weights)
rowsum = -error.sum(axis=1)
rowsum = rowsum.repeat(self.K, axis=1)
error = error/rowsum #平均错误率
error[0,self.labelMat[rdmidx,0]] += 1 #此时的error只有一行
self.weights = self.weights + alpha * self.dataMat[rdmidx].transpose()* error
delete(idxs,[rdmidx],None) #避免重复 注意delete的用法 idxs是数组 后面跟角标大小 然后是axis
self.likelihoodfunc()
print (self.weights)
def classify(self,X):
p = X * self.weights
return p.argmax(1)[0,0] #返回的就是类别标签0123
def test(self):
xcord0 = []; ycord0 = []
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
xcord3 = []; ycord3 = []
for i in range(50):
for i in arange(80): #4000个样本
x = random.uniform(-3.0, 3.0)
y = random.uniform(0.0, 15.0)
c = self.classify(mat([[1.0,x,y]]))
if c==0:
xcord0.append(x); ycord0.append(y)
if c==1:
xcord1.append(x); ycord1.append(y)
if c==2:
xcord2.append(x); ycord2.append(y)
if c==3:
xcord3.append(x); ycord3.append(y) #得到数集
fig1 = plt.figure('fig1')
ax = fig1.add_subplot(111)
ax.scatter(xcord0, ycord0, s=20, c='yellow', marker='s')
ax.scatter(xcord1, ycord1, s=20, c='blue')
ax.scatter(xcord2, ycord2, s=20, c='red')
ax.scatter(xcord3, ycord3, s=20, c='black')
plt.title('inference')
plt.xlabel('X1')
plt.ylabel('X2');
plt.show('fig1')
def test0(self): #主要功能就是将我们的训练集数据可视化 而上一个test则是测试了4000个数据的分类
xcord0 = []; ycord0 = []
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
xcord3 = []; ycord3 = []
for i in range(self.M):
if self.labelMat[i]==0:
xcord0.append(self.dataMat[i,1]); ycord0.append(self.dataMat[i,2])
elif self.labelMat[i]==1:
xcord1.append(self.dataMat[i,1]); ycord1.append(self.dataMat[i,2])
elif self.labelMat[i]==2:
xcord2.append(self.dataMat[i,1]); ycord2.append(self.dataMat[i,2])
else:
xcord3.append(self.dataMat[i,1]); ycord3.append(self.dataMat[i,2])
fig2 = plt.figure('fig2')
ax = fig2.add_subplot(111)
ax.scatter(xcord0, ycord0, s=20, c='yellow', marker='s')
ax.scatter(xcord1, ycord1, s=20, c='blue')
ax.scatter(xcord2, ycord2, s=20, c='red')
ax.scatter(xcord3, ycord3, s=20, c='black')
plt.title('train data')
plt.xlabel('X1')
plt.ylabel('X2');
plt.show('fig2')
if __name__=='__main__': #开始调用运行
inputfile = 'C:\\Python34\\SoftInput.txt'
myclassification = SoftmaxRegression() #实例化为myclassification
myclassification.loadDataSet(inputfile)
# myclassification.gradientAscent()
myclassification.stochasticGradientAscent_V1()
# myclassification.stochasticGradientAscent_V0()
myclassification.test()
myclassification.test0()</span>
运行结果:
上图为V0的运行结果
上图为V1的运行结果
对比V0和V1两者的差别还是比较大的,V1更为精准一点
最后V1得到的似然函数和权值矩阵