回归问题通过线性回归已经得到了粗略的认识,接下来通过线性回归后面加一个 sigmoid 函数,即将输出限制到(0,1),那么我们就可以通过判断输出的来进行分类,认为 output >0.5 和 output<0.5归为两个不同的类别,可以看出逻辑回归只能解决的二分类问题。
下面从统计概率的角度去学习逻辑回归问题:
上面通过最大化似然函数得到了 参数 theta
下面看一下程序:(python)
import numpy as np
import matplotlib.pyplot as plt
class LogisticRegression(object):
def __init__(self,data,label,alpha): # data is two dim label is one dim alpha is learning rate
self.theta = 10*np.random.random((1,3))
self.label = label
self.x = np.vstack((np.ones((1,data.shape[1]) ) ,data) )
self.alpha = alpha
def hypothesis(self):
return 1/(1+np.exp((-1)*self.theta.dot(self.x)))
def learn(self):
count =0
while count <1000:
count+=1
h= self.hypothesis()
secondTerm = self.alpha*np.sum((h-self.label )*self.x,axis =1)
self.theta = self.theta -secondTerm
print self.hypothesis()
if __name__ == "__main__":
trainData =np.array([[1,1,2,4,5,5],[1,2,1,5,5,4]])#construct train data and label
trainLabel= np.array([1,1,1,0,0,0])
logisticRegression = LogisticRegression(trainData,trainLabel,0.01)
logisticRegression.learn() #learn algorithm
theta = logisticRegression.theta
print theta #print after learn
testData = np.arange(0,9,0.1)
testData_c= np.vstack((np.ones((1,testData.size) ) ,testData) )
testResult =theta[0,0:2].dot(testData_c)/theta[0,2]*(-1) #construct test data and test result
#draw the result
plt.plot(testData,testResult)
plt.plot(trainData[0,0:3],trainData[1,0:3],'r^')
plt.plot(trainData[0,3:],trainData[1,3:],'o')
plt.axis([0,10,0,10])
plt.show()
以上是程序部分 ,结果图如下: