'''
这部分代码是lwls.py文件,基于随机梯度下降方法,
这与CSDN大部分相关文章求解正规方程(中间需要求逆)的方法不同,
这里不需要求矩阵的逆。
但是,它带来的另外一个问题是如何调参,如何找到最好的参数…………
我暂时还没找到好方法……
欢迎留言评论
'''# Locally Weighted Linear Regression is very inefficient because Parameters are calculated again for each test case# But, it should give good results after tuning the hyper-parameter tauimport csv
import math
import numpy
defconverge(t):for i in t:ifabs(i)> epsilon:returnFalsereturnTruedefstochastic_gradient_descent(w,theta):for _ inrange(max_n):for i inrange(len(X_s)):
x = numpy.array(X_s[i])
t =[0]*len(theta)for j inrange(len(theta)):
t[j]= alpha*w[i]*(Y_s[i]-numpy.dot(numpy.array(theta),x))*x[j]for j inrange(len(theta)):
theta[j]= theta[j]+ t[j]# print(theta)# if converge(t):# return thetareturn theta
defget_data(name):
data =[]withopen(name,'r')as csv_file:
reader = csv.reader(csv_file)for row in reader:for i inrange(len(row)):
row[i]=float(row[i])
data = data +[row]return data # 返回一个二维列表,里面的一个列表表示一个样本,样本不含常数项(1),含y值标签,且该标签在各样本的末尾处defarrange_data(data):#把data文件中样本集(含特征和标签),划分成特征、标签,并将其返回
Xs =[[]]*len(data)# 二维列表
Ys =[[]]*len(data)for i inrange(len(data)):
Xs[i]= data[i][:-1]+[1]# 添加常数项
Ys[i]= data[i][-1]return Xs,Ys
defweight(x_i,x):# 计算局部测试数据点特征x,与,训练数据的特征x_i,的权重
x_i = numpy.array(x_i)
x = numpy.array(x)
temp = x_i-x
temp = numpy.dot(temp,temp)return math.exp(-1.0*temp/(2*tau*tau))defget_weights(Xs,x):# 计算局部测试数据点特征x,与,整个训练集特征Xs,的权重,即该测试点的权重# return [1]*len(Xs) # Uncomment If you want standard Linear Regression
weights =[0]*len(Xs)for i inrange(len(weights)):
weights[i]= weight(Xs[i],x)return weights
defget_parameters(w,n):# 计算参数theta,以备后续get_prediction
theta =[0]*n
theta = stochastic_gradient_descent(w,theta)# 使用随机梯度下降更新每个测试数据点的参数,这里是不同于大多数代码的地方!# print(theta)return theta
defget_prediction(w,x):
theta = get_parameters(w,len(x))
prediction = numpy.dot(numpy.array(theta),numpy.array(x))return prediction
data_train = get_data('hw_data_train.csv')
data_test = get_data('hw_data_test.csv')
X_s,Y_s = arrange_data(data_train)
Xts,Yts = arrange_data(data_test)# (HYPER-)PARAMETERS
tau =0.1# Weight Parameter
alpha =0.01# Learning Rate
max_n =1000# Stochastic Gradient Descent Loops
epsilon =0.0001# Stochastic Gradient Descent Tolerance [not using here, though]
variance =float(0)for i inrange(len(Xts)):
x = Xts[i]
y = Yts[i]
w = get_weights(X_s,x)
prediction = get_prediction(w,x)print("Actual: "+str(y)+" Predicted: "+str(prediction))
variance = variance +(prediction-y)**2
variance = variance/len(Xts)print("Variance: ",variance)
# 这部分用来生成训练和测试数据import numpy
num_train =100
num_test =20withopen("data_train.csv",'a')asfile:
x =-5for i inrange(num_train):
y =(x)**2+0*numpy.random.normal(0,1)file.write(str(x)+","+str(y)+"\n")
x = x +10/num_train
withopen("data_test.csv",'a')asfile:
x =-5for i inrange(num_test):
y =(x)**2+0*numpy.random.normal(0,1)file.write(str(x)+","+str(y)+"\n")
x = x +10/num_test
原文在这里''' 这部分代码是lwls.py文件,基于随机梯度下降方法, 这与CSDN大部分相关文章求解正规方程(中间需要求逆)的方法不同, 这里不需要求矩阵的逆。 但是,它带来的另外一个问题是如何调参,如何找到最好的参数………… 我暂时还没找到好方法…… 欢迎留言评论 '''# Locally Weighted Linear Regression is very inefficient because Parameters are calculated again for each te