统计学习方法-逻辑斯蒂回归

本文探讨了逻辑斯蒂回归中常用的优化方法,包括梯度下降法、牛顿法及其改进的拟牛顿法,如BFGS和DFP算法。
摘要由CSDN通过智能技术生成

梯度下降法

import numpy as np
import math
"""逻辑斯蒂回归算法 梯度下降方法"""
def L_W(X, y, w, b):
	wx = np.dot(w, X.T) + b
	return np.dot(y, wx) - np.sum( np.log(1+np.exp(wx) ), axis=0)

def update_w(X, y, w, b, delta=1):
	#wx = np.dot(w, X.T) + b
	#w = w + delta * ( np.sum( (y*X.T).T, axis=0) - 
	#	np.sum((np.exp(wx)/(np.exp(wx)+1) * X.T).T, axis=0) )
	wx = np.exp( np.dot(w, X.T) + b )
	w = w + delta * np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	return w

def update_b(X, y, w, b, delta=1):
	#wx = np.dot(w, X.T) + b
	#b = b + delta * ( np.sum(y, axis=0) -  np.sum(np.exp(wx)/( 1 + np.exp(wx)), axis=0))
	wx = np.exp( np.dot(w, X.T) + b )
	b = b + delta * ( np.sum(y - wx/( 1 + wx), axis=0))
	return b

def Loggistics(X, y, delta = 1, maxIter=500000):
	import copy
	w = np.zeros(X.shape[1])
	b = 0
	iter = 0
	old_value = -math.inf
	old_w = copy.deepcopy(w)
	old_b = copy.deepcopy(b)
	while (iter<maxIter) and old_value< L_W(X, y, w, b):
		old_value = L_W(X, y, w, b)
		#print("old_value", old_value)
		old_w = copy.deepcopy(w)
		old_b = b#copy.deepcopy(b)
		w = update_w(X, y, old_w, old_b, delta = 0.1245)
		b = update_b(X, y, old_w, old_b, delta = 0.1245)	
		if iter%10000==0:
			print(L_W(X, y, w, b))
			print("iteration number %d" % iter)
		iter += 1
		#break
	return old_w, old_b

def predict(x, w, b):
	wx = np.dot(w, x) + b
	#print(wx)
	return math.exp(wx) / (1 + math.exp(wx))

w, b =Loggistics(np.array([[3,3],[4,3],[1,1], [3,4]]), np.array([1,1, 0, 0]), delta=1)

print(predict(np.array([3,3]), w, b))
print(predict(np.array([4,3]), w, b))
print(predict(np.array([1,1]), w, b))
print(predict(np.array([3,4]), w, b))


牛顿法

'''
def NewtonMethod(maxIter = 100):
	"""2x^2 - 10x + 1"""
	b = 0
	iter = 0
	x1 = 0
	while (iter<maxIter):
		x2 = x1 - (2*x1*x1 - 10 *x1 + 1) / (4*x1 - 10 )
		#x2 = x1 - (4*x1 - 10 ) / (4)
		x1 = x2
		iter += 1
		print(2*x1*x1 - 10 *x1 + 1)
	return x1

x = NewtonMethod()
print(2*x*x - 10 *x + 1)
'''


import numpy as np
import math
"""逻辑斯蒂回归 牛顿法"""

#对数似然函数为目标函数
def L_W(X, y, w):
	wx = np.dot(w, X.T)
	return np.dot(y, wx) - np.sum(np.log(1+np.exp(wx)), axis=0)

#计算Hesse矩阵
def update_H(X, y, w):
	wx = np.exp(np.dot(w, X.T))
	c = wx/((1+wx)*(1+wx))
	#H = np.zeros((X.shape[1], X.shape[1]))
	#for i in range(X.shape[1]):
	#	for j in range(X.shape[1]):
	#		H[i,j] = -np.sum(X[:,i] * X[:,j] * c, axis=0)
	#print(-np.dot(X.T, (X.T*c).T)-H)
	H = -np.dot(X.T, (X.T*c).T)
	return H

def update_w(X, y, w, H, delta=1):
	wx = np.exp( np.dot(w, X.T) )
	#gx目标函数的梯度
	gx =  np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	H_ = np.linalg.inv(H)
	w = w - np.dot(H_, gx)
	return w


def Loggistics(X, y, delta = 1, maxIter=70):
	import copy
	w = np.zeros(X.shape[1])
	H = np.zeros((X.shape[1], X.shape[1])) 
	iter = 0
	old_value = -math.inf
	old_w = copy.deepcopy(w)
	while (iter<maxIter) and old_value<= L_W(X, y, w):
		old_value = L_W(X, y, w)
		old_w = copy.deepcopy(w)
		H = update_H(X, y, w)
		w = update_w(X, y, old_w, H, delta = 0.1245)
		if iter%10==0:
			print("iteration number %d" % iter)
			print("old_value", old_value)
		iter += 1
	return old_w

def predict(x, w):
	wx = math.exp(np.dot(w, x))
	return wx / (1 + wx)

w =Loggistics(np.array([[3,3, 1],[4,3, 1],[1,1, 1], [3,4, 1]]), np.array([1,1, 0, 0]), delta=1)

print(predict(np.array([3,3, 1]), w))
print(predict(np.array([4,3, 1]), w))
print(predict(np.array([1,1, 1]), w))
print(predict(np.array([3,4, 1]), w))



拟牛顿法-BFGS 算法

import numpy as np
import math
"""逻辑斯蒂回归 拟牛顿法, BFGS算法"""

#对数似然函数为目标函数
def L_W(X, y, w):
	wx = np.dot(w, X.T)
	return np.dot(y, wx) - np.sum(np.log(1+np.exp(wx)), axis=0)

#计算Hesse近似矩阵, BFGS算法
def update_B(X, y, B, gx, old_gx, w, old_w):
	delta_k = np.mat(w - old_w)
	y_k = np.mat(gx - old_gx)
	B = B + y_k.T*y_k/(y_k*delta_k.T) - B *delta_k.T*delta_k*B/(delta_k*B*delta_k.T)
	return B

def update_w(X, y, w, B, delta=1):
	wx = np.exp( np.dot(w, X.T) )
	#gx目标函数的梯度
	gx =  np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	B_ = np.linalg.inv(B)
	lam1 = 0
	lam2 = 1
	while L_W(X, y, w - lam2* np.dot(np.array(B_), gx)) < L_W(X, y, w - 2* lam2* np.dot(np.array(B_), gx)):
		lam2 = 2*lam2
	lam1 = lam2
	lam2 = lam2+2

	x1 = (lam2 - lam1)*0.382 + lam1
	x2 = (lam2 - lam1)*0.618 + lam1
	print("lam2",lam2, x1, x2)
	#一维搜索lam,0.618法
	while x2>x1:
		if L_W(X, y, w - x1* np.dot(np.array(B_), gx)) < L_W(X, y, w - x2* np.dot(np.array(B_), gx)):
			lam2 = x2
		else:
			lam1 = x1
		#x1 = (lam2 - lam1)*0.382 + lam1
		#x2 = (lam2 - lam1)*0.618 + lam1	
		x1 = (lam2 - lam1)*0.382 + lam1
		x2 = (lam2 - lam1)*0.618 + lam1	
	print("x2",x2)
	w = w - x2*np.dot(np.array(B_), gx)
	return w


def Loggistics(X, y, delta = 1, maxIter=7000):
	import copy
	w = np.zeros(X.shape[1])
	wx = np.exp(np.dot(w, X.T))
	c = wx/((1+wx)*(1+wx))
	B = -np.dot(X.T, (X.T*c).T)
	old_gx = np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	#gx = np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	old_value = -math.inf
	iter = 0
	while (iter<maxIter) and old_value< L_W(X, y, w) and np.dot(old_gx, old_gx)>=1e-300:
		old_value = L_W(X, y, w)
		old_w = copy.deepcopy(w)
		w = update_w(X, y, w, B, delta=1)
		
		wx = np.exp(np.dot(w, X.T))
		gx = np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
		
		B = update_B(X, y, B, gx, old_gx, w, old_w)
		old_gx = gx 

		if iter%1==0:
			print("iteration number %d" % iter)
			print("old_value", old_value)
		iter += 1
	return old_w

def predict(x, w):
	wx = math.exp(np.dot(w, x))
	return wx / (1 + wx)

w =Loggistics(np.array([[3,3, 1],[4,3, 1],[1,1, 1], [3,4, 1], [2,2, 1], [3,2, 1]]), np.array([1,1, 0, 0, 0, 1]), delta=1)

print(predict(np.array([3,3, 1]), w))
print(predict(np.array([4,3, 1]), w))
print(predict(np.array([1,1, 1]), w))
print(predict(np.array([3,4, 1]), w))
print(predict(np.array([2,2, 1]), w))
print(predict(np.array([3,2, 1]), w))

拟牛顿法-DFP算法

import numpy as np
import math
"""逻辑斯蒂回归 拟牛顿法, DFP算法"""

#对数似然函数为目标函数
def L_W(X, y, w):
	wx = np.dot(w, X.T)
	return np.dot(y, wx) - np.sum(np.log(1+np.exp(wx)), axis=0)

#计算Hesse近似矩阵, DFP算法
def update_G(X, y, G, gx, old_gx, w, old_w):
	delta_k = np.mat(w - old_w)
	y_k = np.mat(gx - old_gx)
	G = G + delta_k.T*delta_k/(delta_k*y_k.T) - G *y_k.T*y_k*G/(y_k*G*y_k.T)
	#print("14",delta_k, y_k, delta_k*y_k.T, y_k*G*y_k.T)
	return G

def update_w(X, y, w, H, delta=1):
	wx = np.exp( np.dot(w, X.T) )
	#gx目标函数的梯度
	gx =  np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	lam1 = 0
	lam2 = 1
	while L_W(X, y, w - lam2* np.dot(np.array(H), gx)) < L_W(X, y, w - 2* lam2* np.dot(np.array(H), gx)):
		lam2 = 2*lam2
	lam1 = lam2
	lam2 = lam2+2

	x1 = (lam2 - lam1)*0.382 + lam1
	x2 = (lam2 - lam1)*0.618 + lam1
	print("lam2",lam2, x1, x2)
	#一维搜索lam,0.618法
	while x2>x1:
		if L_W(X, y, w - x1* np.dot(np.array(H), gx)) < L_W(X, y, w - x2* np.dot(np.array(H), gx)):
			lam2 = x2
		else:
			lam1 = x1
		x1 = (lam2 - lam1)*0.382 + lam1
		x2 = (lam2 - lam1)*0.618 + lam1	
		#x1 = (lam2 - lam1)*0.382 + lam1
		#x2 = (lam2 - lam1)*0.618 + lam1	
	print("x2",x2)
	w = w - x2*np.dot(np.array(H), gx)
	return w


def Loggistics(X, y, delta = 1, maxIter=7000):
	import copy
	w = np.zeros(X.shape[1])
	wx = np.exp(np.dot(w, X.T))
	c = wx/((1+wx)*(1+wx))
	G = -np.dot(X.T, (X.T*c).T)
	G = np.linalg.inv(G)
	old_gx = np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	#gx = np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
	old_value = -math.inf
	iter = 0
	while (iter<maxIter) and old_value< L_W(X, y, w) and np.dot(old_gx, old_gx)>=1e-300:
		old_value = L_W(X, y, w)
		old_w = copy.deepcopy(w)
		w = update_w(X, y, w, G, delta=1)
		
		wx = np.exp(np.dot(w, X.T))
		gx = np.sum(( (y - wx/(wx + 1) )* X.T).T, axis=0)
		
		G = update_G(X, y, G, gx, old_gx, w, old_w)
		old_gx = gx 

		if iter%1==0:
			print("iteration number %d" % iter)
			print("old_value", old_value)
		iter += 1
	return old_w

def predict(x, w):
	wx = math.exp(np.dot(w, x))
	return wx / (1 + wx)

w =Loggistics(np.array([[3,3, 1],[4,3, 1],[1,1, 1], [3,4, 1], [2,2, 1], [3,2, 1], [2,3,1]]), np.array([1,1, 0, 0, 0, 1, 0]), delta=1)

print(predict(np.array([3,3, 1]), w))
print(predict(np.array([4,3, 1]), w))
print(predict(np.array([1,1, 1]), w))
print(predict(np.array([3,4, 1]), w))
print(predict(np.array([2,2, 1]), w))
print(predict(np.array([3,2, 1]), w))
print(predict(np.array([2,3, 1]), w))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值