【Python代码】 局部线性回归(随机梯度下降方法)

原文在这里

'''
 这部分代码是lwls.py文件,基于随机梯度下降方法,
 这与CSDN大部分相关文章求解正规方程(中间需要求逆)的方法不同,
 这里不需要求矩阵的逆。
 但是,它带来的另外一个问题是如何调参,如何找到最好的参数…………
 我暂时还没找到好方法……
 欢迎留言评论
 '''
# Locally Weighted Linear Regression is very inefficient because Parameters are calculated again for each test case
# But, it should give good results after tuning the hyper-parameter tau

import csv
import math
import numpy

def converge(t):
	for i in t:
		if abs(i) > epsilon:
			return False
	return True

def stochastic_gradient_descent(w,theta):
	for _ in range(max_n):
		for i in range(len(X_s)):
			x = numpy.array(X_s[i])
			t = [0]*len(theta)
			for j in range(len(theta)):
				t[j] = alpha*w[i]*(Y_s[i]-numpy.dot(numpy.array(theta),x))*x[j]
			for j in range(len(theta)):
				theta[j] = theta[j] + t[j]
				# print(theta)
			# if converge(t):
			# 	return theta
	return theta

def get_data(name):
	data = []
	with open(name, 'r') as csv_file:
		reader = csv.reader(csv_file)
		for row in reader:
			for i in range(len(row)):
				row[i] = float(row[i])
			data = data + [row]
	return data # 返回一个二维列表,里面的一个列表表示一个样本,样本不含常数项(1),含y值标签,且该标签在各样本的末尾处

def arrange_data(data): #把data文件中样本集(含特征和标签),划分成特征、标签,并将其返回
	Xs = [[]]*len(data) # 二维列表
	Ys = [[]]*len(data)
	for i in range(len(data)):
		Xs[i] = data[i][:-1]+[1] # 添加常数项
		Ys[i] = data[i][-1]
	return Xs,Ys

def weight(x_i,x): # 计算局部测试数据点特征x,与,训练数据的特征x_i,的权重
	x_i = numpy.array(x_i)
	x = numpy.array(x)
	temp = x_i-x
	temp = numpy.dot(temp,temp)
	return math.exp(-1.0*temp/(2*tau*tau))

def get_weights(Xs,x): # 计算局部测试数据点特征x,与,整个训练集特征Xs,的权重,即该测试点的权重
	# return [1]*len(Xs) # Uncomment If you want standard Linear Regression
	weights = [0]*len(Xs)
	for i in range(len(weights)):
		weights[i] = weight(Xs[i],x)
	return weights

def get_parameters(w,n): # 计算参数theta,以备后续get_prediction
	theta = [0]*n
	theta = stochastic_gradient_descent(w,theta) # 使用随机梯度下降更新每个测试数据点的参数,这里是不同于大多数代码的地方!
	# print(theta)
	return theta

def get_prediction(w,x):
	theta = get_parameters(w,len(x))
	prediction = numpy.dot(numpy.array(theta),numpy.array(x))
	return prediction

data_train = get_data('hw_data_train.csv')
data_test = get_data('hw_data_test.csv')

X_s,Y_s = arrange_data(data_train)
Xts,Yts = arrange_data(data_test)

# (HYPER-)PARAMETERS
tau = 0.1 # Weight Parameter
alpha = 0.01 # Learning Rate
max_n = 1000 # Stochastic Gradient Descent Loops
epsilon = 0.0001 # Stochastic Gradient Descent Tolerance [not using here, though]


variance = float(0)
for i in range(len(Xts)):
	x = Xts[i]
	y = Yts[i]
	w = get_weights(X_s,x)
	prediction = get_prediction(w,x)
	print("Actual: " + str(y) + " Predicted: " + str(prediction))
	variance = variance + (prediction-y)**2
variance = variance/len(Xts)
print("Variance: ",variance)

# 这部分用来生成训练和测试数据
import numpy

num_train = 100
num_test = 20

with open("data_train.csv",'a') as file:
	x = -5
	for i in range(num_train):
		y = (x)**2 + 0*numpy.random.normal(0,1)
		file.write(str(x)+","+str(y)+"\n")
		x = x + 10/num_train

with open("data_test.csv",'a') as file:
	x = -5
	for i in range(num_test):
		y = (x)**2 + 0*numpy.random.normal(0,1)
		file.write(str(x)+","+str(y)+"\n")
		x = x + 10/num_test
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值