线性回归
'''linear_regression'''
import numpy as np
import matplotlib.pyplot as plt
# y=wx+b
# 计算 loss = (1/2*N)*(∑1->i(yi-w*xi)**2)
def compute_error_for_line_given_points (w, b, points):
total_error = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
total_error += (y-(x * w + b)) ** 2
return total_error / float(len(points))*2
# w' = w - lr * ▼loss/▼w
# b' = b - lr * ▼loss/▼b
def step_gradient(w_current, b_current, points, learning_rate):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(0, int(N)):
x = points[i, 0]
y = points[i, 1]
w_gradient += - 1/N * (y - (w_current * x + b_current)) * x
b_gradient += - 1/N * (y - (w_current * x + b_current))
new_w = w_current - learning_rate * w_gradient
new_b = b_current - learning_rate * b_gradient
return [new_w, new_b]
def gradient_descent_runner(points, start_w, start_b, learning_rate, num_iterations):
w = start_w
b = start_b
for i in range(num_iterations):
w, b = step_gradient(w, b, np.array(points), learning_rate)
return [w, b]
def run():
points = np.genfromtxt("data.csv")
print(len(points))
learning_rate = 0.006
initial_w = 0
initial_b = 0
num_iterations = 1000
print("Starting gradient descent at w={0},b={1},error={2}".format(initial_w, initial_b
, compute_error_for_line_given_points(initial_w, initial_b, points)))
print("Running...")
[w, b] = gradient_descent_runner(points, initial_w, initial_b, learning_rate, num_iterations)
array_x = points[:, 0]
array_y = points[:, 1]
plt.plot(array_x , array_y)
plt.show()
print("After {0} iterations w = {1} b = {2} error = {3}".format(num_iterations, w, b
,compute_error_for_line_given_points(w, b, points)))
if __name__ == '__main__':
run()
创建虚拟数据
import numpy as np
import matplotlib.pyplot as plt
arrays_x = np.random.randn(1000)
arrays_y = np.array(arrays_x*2.234+3.765)
res = np.hstack([arrays_x[:, np.newaxis], arrays_y[:, np.newaxis]])
print(res.dtype)
plt.scatter(arrays_x,arrays_y)
plt.show()
np.savetxt('data.csv', res)