代码实现
局部加权回归的代码实现:
# 定义h函数
def h(x):
return theta[0]+theta[1]*x
# 定义w函数进行指数运算
def w(xi, x_pre, sigma):
num = -(xi-x_pre)**2/(2*(sigma**2))
return math.exp(num)
def non_parameter(x, y, x_pre, sigma):
global wi
global cnt
global error0
while True:
cnt += 1
diff = [0]*2
wi = 0
for i in range(m):
wi = w(x[i], x_pre, sigma)
diff[0] += (h(x[i])-y[i])*wi
diff[1] += ((h(x[i])-y[i])*x[i])*wi
theta[0] -= learning_rate*diff[0]
theta[1] -= learning_rate*diff[1]
error1 = 0
wi = 0
for i in range(m):
wi = w(x[i], x_pre, sigma)
error1 += ((h(x[i]) - y[i]) ** 2 / 2)*wi
if (abs(error1 - error0) < epsilon):
return (theta,cnt)
else:
error0 = error1
logistic回归的代码实现:
import matplotlib.pyplot as plt
import numpy as np
import math
# 加载数据
def load_dataset(filename):
# 特征列表和标签列表
dataset = []
labelset = []
fr = open(filename)
for line in fr.readlines():
row = line.strip().split()
dataset.append([1.0, float(row[0]), float(row[1])])
labelset.append(int(row[2]))
return dataset,labelset
# 定义对矩阵进行处理的sigmoid函数
def sigmoid(x,m):
x_list = x.tolist()
h_mat = np.mat(np.zeros((m, 1)))
for i in range(m):
value = 1/(1+math.exp(- x_list[i][0]))
h_mat[i, 0] = value
return h_mat
# 定义对单个数据点处理sigmoid函数
def sigmoid1(x):
return 1/(1+math.exp(-x))
# 批量grdient_up算法
def batchgradient_up(dataset,labelset):
# 将特征列表转换为一个m*n的矩阵
dataset_matrix = np.mat(dataset)
# 将输出列表转换为一个列矩阵
labelset_matrix = np.mat(labelset).T
# 取特征矩阵的(m,n)
m,n = np.shape(dataset_matrix)
# 定义学习率
learning_rate = 0.01
# 迭代次数
maxcycles = 5000
# 初始化权重
weights = np.mat(np.ones((n, 1)))
h = np.mat(np.zeros((m, 1)))
for i in range(maxcycles):
h = sigmoid(dataset_matrix*weights, m)
# 梯度上升法
weights += learning_rate * dataset_matrix.T *(labelset_matrix - h)
return weights
# 随机梯度上升算法
def randgradient_up(dataset,labelset):
# 将特征列表转换为一个m*n的矩阵
dataset_matrix = np.mat(dataset)
# 将输出列表转换为一个列矩阵
labelset_matrix = np.mat(labelset).T
# 取特征矩阵的(m,n)
m, n = np.shape(dataset_matrix)
# 定义学习率
learning_rate = 0.01
# 迭代次数
maxcycles = 500
# 初始化权重
weights = np.mat(np.ones((n, 1)))
h = np.mat(np.zeros((m, 1)))
cnt = 0
while(cnt < maxcycles):
for i in range(m):
h = sigmoid1(dataset_matrix[i]*weights)
error = labelset_matrix[i][0] - h
weights += learning_rate * error[0, 0] * dataset_matrix[i].T
cnt += 1
return weights
# 作图
def plotbestfit(dataset,labelset,weights):
m = len(dataset)
x1cord_0 = []
x2cord_0 = []
x1cord_1 = []
x2cord_1 = []
# 将标签为0和1的点分开
for i in range(m):
if(labelset[i] == 1):
x1cord_1.append(dataset[i][1])
x2cord_1.append(dataset[i][2])
else:
x1cord_0.append(dataset[i][1])
x2cord_0.append(dataset[i][2])
plt.scatter(x1cord_0, x2cord_0, c='green')
plt.scatter(x1cord_1,x2cord_1, c='red', marker='s')
x = [-4, -2, 0, 2, 4 ]
print(weights)
y = [((-weights[0, 0]-weights[1, 0]*i)/weights[2, 0]) for i in x]
plt.plot(x,y)
plt.show()
if __name__ == '__main__':
dataset,labelset = load_dataset("logistic_regression_data/testSet.txt")
weights = randgradient_up(dataset, labelset)
plotbestfit(dataset,labelset,weights)