使用numpy写一个简单的线性回归
首先我们生成一些数据点作为测试点,如果有测试点数据则这步就没用了
import numpy as np
import matplotlib.pyplot as plt
points = []
for i in range(300):
x = round(np.random.uniform(0,20),2)
noise = np.random.normal(0,1)
y = round(0.825 * x + noise,2)
points.append([x,y])
plt.scatter(x,y,c = 'r')
plt.show()
可以简单看一下生成的数据散点图
接下来正式进入到线性回归的运算中
第一步:我们需要实现误差计算函数
公式:
代码实现:
#step 1:实现计算误差函数
def compete_error_for_given_points(b,w,points):
total_error = 0
for i in range(0,len(points)):
x = points[i,0]
y = points[i,1]
total_error += (y-(w*x+b))**2#均方误差
return total_error / float(len(points))
第二步:计算梯度和更新
公式:
其中lr为学习率learing rate,loss分别对w,b求偏导,
代码实现
#step 2:计算梯度和更新
def compete_gradient_and_update(b_current,w_current,lr,points):
w_gradient = 0
b_gradient = 0
N = float(len(points))
for i in range(0,len(points)):
x = points[i,0]
y = points[i,1]
w_gradient += (2/N)*x*((w_current*x+b_current)-y)
b_gradient += (2/N)*((w_current*x+b_current)-y)
new_w = w_current - (lr*w_gradient)
new_b = b_current - (lr*b_gradient)
return [new_w,new_b]
第三步,训练次数循环
代码实现:
#step 3:w and b ,loop
def gradient_desent_runner(w_start,b_start,lr,times,points):
b = b_start
w = w_start
for i in range(times):
w,b = compete_gradient_and_update(b,w,lr,np.array(points))
return [w,b]
最后,赋值运行看一下:
def run():
lr = 0.001
initial_b = 0
initial_w = 0
times = 1500
print("Starting Giadient desent at w ={0},b ={1},error={2}"
.format(initial_w,initial_b,compete_error_for_given_points(initial_b,initial_w,np.array(points))))
print("Running:")
[w,b] = gradient_desent_runner(initial_w,initial_b,lr,times,points)
print("After {0} times w = {1},b = {2},error = {3}"
.format(times,w,b,compete_error_for_given_points(b,w,np.array(points))))
if __name__ == '__main__':
run()
运行结果:
最后的w and b可以对比我们生成数据时候的w,看以看到ERROR变化还是比较大的,在CPU版本运行下,时间还是挺快的。