原理
逻辑回归原理
梯度下降法
import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
def cost_function(data,labels,positive_probability):
n,p=data.shape
a=labels*np.log(positive_probability)
b=(1-labels)*np.log(1-positive_probability)
c=a+b
return -1/n*np.sum(c,axis=0)[0]
def sigmoid(data,beta):
inner_product=np.dot(data,beta)
probability=1/(1+np.exp(-inner_product))
return probability
def LogisticRegByBGD(data,labels,eps1,eps2,max_iter,alpha):
n,p = data.shape
labels = labels.reshape((n,1))
beta = np.random.rand(p+1,1)
data = np.column_stack((data,np.ones(n)))
postive_probability = sigmoid(data,beta)
iter=0
cost=0
while True:
new_beta=beta.copy()
for i in range(p+1):
y1=data[:,i].reshape((n,1))*(postive_probability-labels)
b = alpha * 1/n * np.sum(y1,axis = 0)[0]
new_beta[i,0] = new_beta[i,0] - b
new_cost=cost_function(data,labels,postive_probability)
gradient_change=np.sum(np.abs(new_beta-beta),axis = 0)[0]
iter += 1
if iter > max_iter or abs(new_cost-cost)<eps1 or gradient_change < eps2:
break
beta=new_beta
postive_probability = sigmoid(data,beta)
return beta
def classification(positive_probability):
negative_probability=1-positive_probability
probability=np.column_stack((negative_probability,positive_probability))
return np.argmax(probability,axis=1)
digits=datasets.load_digits()
X, y = digits.data, digits.target
X = StandardScaler().fit_transform(X)
y = (y > 4).astype(np.int)
max_iter=20000
eps=1e-1
eps1=1e-6
eps2=1e-6
alpha=1e-2
beta=LogisticRegByBGD(X , y , eps1 , eps2 , max_iter , alpha)
X=np.column_stack((X,np.ones(X.shape[0])))
predict=classification(sigmoid(X,beta))
print("训练集分类精度"+str((predict==y).astype(np.int).mean()))
sklearn实现逻辑回归
L1_LR=LogisticRegression(C=0.1, penalty='l2', tol=0.01)
L1_LR.fit(X,y)
L1_LR_predict=L1_LR.predict(X)
print((L1_LR_predict==y).astype(np.int).mean())