class Logistic():
def __init__(self):
self.w = np.zeros(X.shape[1]+1)
def sigmoid(self,inX):
g = 1.0/(1.0+ np.exp(-inX))
return g
def cost(self,X,y):
J = 0
m = X.shape[0]
'''
for i in range(m):
hx = self.sigmoid(X[i,:].dot(self.w))
J+= y[i]*np.log(hx)+ (1-y[i])* np.log(1 - hx)
J/= -1*m
'''
hx = self.sigmoid(X.dot(self.w))
J = y.dot(np.log(hx))+ (1-y).dot(np.log(1 - hx))
return J
## 梯度上升,每次迭代需要遍历整个数据集
def fit_graAscent(self,X,y):
b = np.ones(X.shape[0])
#X = X[:,np.newaxis]
X = np.insert(X,0,values = b,axis=1)
convergence = 0.0001
maxCount = 500
c = self.cost(X,y)
clim = c + 10
count = 0
alpha = 0.1
while (np.abs(clim - c) > convergence) and count < maxCount:
error = self.sigmoid(X.dot(self.w))- y
self.w -= alpha/X.shape[0]*X.T.dot(error)
clim = c
count += 1
## 随机梯度上升,每次迭代仅仅需要一个数据,计算量小,分类效果差
def fit_stocGraAscent(self,X,y):
b = np.ones(X.shape[0])
#X = X[:,np.newaxis]
X = np.insert(X,0,values = b,axis=1)
convergence = 0.0001
maxCount = 500
c = self.cost(X,y)
clim = c + 10
count = 0
alpha = 0.1
num = int(1 * X.shape[0])
for i in range(num):
while (np.abs(clim - c) > convergence) and count < maxCount:
error = self.sigmoid(X[i].dot(self.w))- y[i]
self.w -= alpha* X[i].T.dot(error)
self.w /= X.shape[0]
clim = c
count += 1
## 改进后随机梯度上升,每次迭代仅仅需要一个数据,计算量小,分类效果差
## 学习率采用非线性下降的方式更新
## 每次使用一个随机位置的数据,选过的不再进行选择
def fit_imstocGraAscent(self,X,y):
b = np.ones(X.shape[0])
#X = X[:,np.newaxis]
X = np.insert(X,0,values = b,axis=1)
convergence = 0.0001
maxCount = 500
c = self.cost(X,y)
clim = c + 10
count = 0
num = int(0.5* X.shape[0])
data_index = np.random.randint(0,X.shape[0],num)
sub_data = X[data_index,:]
sub_y = y[data_index]
for i in range(num):
while (np.abs(clim - c) > convergence) and count < maxCount:
count += 1
alpha = 4/(1+i+count)+0.01
error = self.sigmoid(sub_data[i,:].dot(self.w))- sub_y[i]
self.w -= alpha* sub_data[i,:].T.dot(error)
self.w /= num
clim = c
def predict(self,x_test):
b = np.ones(x_test.shape[0])
#x_test = x_test[:,np.newaxis]
x_test = np.insert(x_test,0,values = b,axis=1)
y_ = x_test.dot(self.w)
prod_ = self.sigmoid(y_)
result = 1 * (prod_> 0.5) + (prod_< 0.5)* 0
return result
if __name__ == "__main__":
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target
scaler = StandardScaler()
X=scaler.fit_transform(X)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
lg_self = Logistic()
lg_self.fit_graAscent(X_train,y_train)
y_ = lg_self.predict(X_test)
print('梯度上升算法的预测精度',(y_== y_test).mean())
lg_self.fit_stocGraAscent(X_train,y_train)
y1_ = lg_self.predict(X_test)
print('随机梯度上升算法的预测精度',(y1_== y_test).mean())
lg_self.fit_imstocGraAscent(X_train,y_train)
y2_ = lg_self.predict(X_test)
print('改进随机梯度上升算法的预测精度',(y2_== y_test).mean())