题目地址:http://www.qlcoder.com/task/763b
"""
有一个1000行的文本文件,文件的每一行有2个浮点数,代表一个点的坐标(x,y)。(所有点都在第一象限)
请找出一条直线 Y=a*X+b,使文件中所有点和该直线的距离的和最短。保留两位小数
思路:f(x) = mx + k;
需要注意的地方,迭代的次数和学习速率的设置
"""
import numpy as np
import matplotlib.pyplot as plt
def compute_error(m,k,points):
totalError = 0
for i in range(len(points)):
x = points[i,0]
y = points[i,1]
totalError += (y - (m * x + k))**2
return totalError / float(len(points))
def step_gradient(m_current, k_current, points, learningRate):
m_gradient = 0.0
k_gradient = 0.0
N = float(len(points))
for i in range(len(points)):
x = points[i, 0]
y = points[i, 1]
m_gradient += - (2/N)*x*(y- (m_current*x + k_current))
k_gradient += - (2/N)*(y - (m_current*x + k_current))
new_m = m_current - learningRate*m_gradient
new_k = k_current - learningRate*k_gradient
return [new_m,new_k]
def gradient_descent_runner(points, starting_m, starting_k, learning_rate, num_iterations):
m = starting_m
k = starting_k
for i in range(num_iterations):
m,k = step_gradient(m, k, np.array(points), learning_rate)
return [m,k]
def run():
points = np.genfromtxt("data.txt", delimiter=' ')
learning_rate = 0.0001
init_m = 0
init_k = 0
num_iterations = 2000
print("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(init_m, init_k, compute_error(init_m, init_k, points)))
print("Running...")
[m, k] = gradient_descent_runner(points, init_m, init_k, learning_rate, num_iterations)
print("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, m, k, compute_error(m, k, points)))
print('----------------------')
print("{0}-{1}".format(m,k))
def draw():
points = np.array(np.genfromtxt("data.txt", delimiter=' '))
x = points[::,0]
y = points[::,1]
plt.scatter(x,y)
plt.show()
if __name__ == "__main__":
run()
# draw()