简单回顾
- 什么是逻辑回归?
- 逻辑回归有哪些作用?
- 如何求解逻辑回归的参数?
数据准备
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(12)
num_observations = 5000
# 依据均值和协方差生成数据
# np.random.multivariate_normal方法用于根据实际情况生成一个多元正太分布矩阵
x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_observations)
x2 = np.random.multivariate_normal([1, 4], [[1, .75],[.75, 1]], num_observations)
simulated_separableish_features = np.vstack((x1, x2)).astype(np.float32)
simulated_labels = np.hstack((np.zeros(num_observations),np.ones(num_observations)))
plt.figure(figsize=(12,8))
plt.scatter(simulated_separableish_features[:, 0], simulated_separableish_features[:, 1],
c = simulated_labels, alpha = .4)
方法一:用sklearn自带函数求解逻辑回归参数
# 1. 引用
from sklearn.linear_model import LogisticRegression
# 2. 调整参数
clf = LogisticRegression(fit_intercept=True, C = 1e15)
# 3. 拟合,训练
clf.fit(simulated_separableish_features, simulated_labels)
# 4. 预测
## 这里不用预测
print(clf.coef_,clf.intercept_,)
方法二:使用梯度下降法求解逻辑回归参数
# 逻辑回归
def sigmoid_eg(x1,x2, theta_1, theta_2,theta_3):
z = (theta_1*x1+ theta_2*x2+theta_3).astype("float_")
return 1.0 / (1.0 + np.exp(-z))
def gradient_eg(x1,x2, y, theta_1, theta_2,theta_3):
sigmoid_probs = sigmoid_eg(x1,x2,theta_1, theta_2,theta_3)
return 1/len(y)*np.sum((y - sigmoid_probs)*x1), 1/len(y)*np.sum((y - sigmoid_probs)*x2), 1/len(y)*np.sum((y - sigmoid_probs))
def GradDe_eg(x1,x2,y,Max_Loop=20, alpha=0.1):
#alpha = 0.00000001
#Max_Loop = 200
# 初始值
theta_1 = 0.1
theta_2 = -0.4
theta_3 = 0.56
for l in range(Max_Loop):
delta1,delta2,delta3 = gradient_eg(x1,x2, y, theta_1, theta_2,theta_3)
theta_1 = theta_1 + alpha*delta1
theta_2 = theta_2 + alpha*delta2
theta_3 = theta_3 + alpha*delta3
if l%1000==0:
print('delta%d ='%(l),[delta1,delta2,delta3])
print('theta%d ='%(l),[theta_1, theta_2,theta_3],'\n')
return [theta_1, theta_2,theta_3]
weights = GradDe_eg(simulated_separableish_features[:,0],simulated_separableish_features[:,1],simulated_labels,200000,0.9)
方法三:牛顿法
def sigmoid_eg(x1,x2, theta_1, theta_2,theta_3):
z = (theta_1*x1+ theta_2*x2+theta_3).astype("float_")
return 1.0 / (1.0 + np.exp(-z))
def Cost(x1,x2, y, theta_1, theta_2,theta_3):
sigmoid_probs = sigmoid_eg(x1,x2, theta_1, theta_2,theta_3)
return -np.mean(y * np.log(sigmoid_probs) + (1 - y) * np.log(1 - sigmoid_probs))
def gradient_nt(x1,x2, y, theta_1, theta_2,theta_3):
sigmoid_probs = sigmoid_eg(x1,x2, theta_1, theta_2,theta_3)
return np.array([np.sum((y - sigmoid_probs) * x1),
np.sum((y - sigmoid_probs) * x2),
np.sum(y - sigmoid_probs)])
# 三阶海塞矩阵求解公式
def hessian(x1,x2, y, theta_1, theta_2,theta_3):
sigmoid_probs = sigmoid_eg(x1,x2, theta_1,theta_2,theta_3)
d1 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 * x1)
d2 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 * x2)
d3 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 )
d4 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 * x1)
d5 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 * x2)
d6 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 )
d7 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 )
d8 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 )
d9 = np.sum((sigmoid_probs * (1 - sigmoid_probs)))
H = np.array([[d1, d2,d3],[d4, d5,d6],[d7,d8,d9]])
return H
theta_1 = 0.001
theta_2 = -0.4
theta_3 = 0.6
sigmoid_probs = sigmoid_eg(simulated_separableish_features[:,0],simulated_separableish_features[:,1],theta_1,theta_2,theta_3)
sigmoid_probs
def newtons_method(x1,x2, y):
# Initialize Cost & parameters
theta_1 = 0.001
theta_2 = -0.4
theta_3 = 0.6
delta_l = np.Infinity
l = Cost(x1,x2, y, theta_1, theta_2,theta_3)
# Convergence Conditions
δ = .0000000001
max_iterations = 15
i = 0
while abs(delta_l) > δ and i < max_iterations:
i += 1
g = gradient_nt(x1,x2, y, theta_1, theta_2,theta_3)
hess = hessian(x1,x2, y, theta_1, theta_2,theta_3)
H_inv = np.linalg.inv(hess)
# @ is syntactic sugar for np.dot(H_inv, g.T)¹
delta = H_inv @ g.T
delta_theta_1 = delta[0]
delta_theta_2 = delta[1]
delta_theta_3 = delta[2]
print(theta_1,theta_2,theta_3,l,g)
# Perform our update step
theta_1 += delta_theta_1
theta_2 += delta_theta_2
theta_3 += delta_theta_3
# Update the log-likelihood at each iteration
l_new = Cost(x1,x2, y, theta_1, theta_2,theta_3)
delta_l = l - l_new
l = l_new
return np.array([theta_1, theta_2,theta_3])
weights_nt = newtons_method(simulated_separableish_features[:,0],simulated_separableish_features[:,1],simulated_labels)
三种方法求得参数对比
从上面结果可以看出来,三种方法求出的参数都差不多。如果想看看哪种求解比较快,可以适当调整代码。
注:
里面的函数定义我未给出注释,但都是由公式定义而来,可以根据这里的公式,对照着看
文中难免会有错误,还望海涵,博主也在努力学习中。。。