第一步
直线是y=k*x+b
用如下代码随机出两个值并附给k和b
k = random.randint(0, 50)
b = random.randint(-50, 50)
最后对应求y的函数就应该是
def y_guess(k, x, b):
return k * x + b
第二步
随便画出一个图形肯定与数据相差大
效果不理想,引入损失函数
L2Loss=n∑(Ytrue−Yhat)2
损失函数越大代表线预测的越差。
函数实现:
# 求损失
def l2_loss(y_ture, y_guess):
return np.mean((np.array(y_ture) - np.array(y_guess)) ** 2)
第三步
学习过程:
对损失函数求偏导。
把k和b当作变量分别对k,b求偏导。
b求偏导:
def partial_b(y_ture, y_guess):
return -2 * np.mean((np.array(y_ture) - np.array(y_guess)))
k求偏导:
def partial_k(y_ture, y_guess, x):
return -2 * np.mean((np.array(y_ture) - np.array(y_guess)) * np.array(x))
完整函数
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import random
import numpy as np
data = load_boston()
X, Y = data['data'], data['target']
room_index = 5
X_rm = X[:, room_index]
print(X.shape)
def partial_k(y_ture, y_guess, x):
return -2 * np.mean((np.array(y_ture) - np.array(y_guess)) * np.array(x))
def partial_b(y_ture, y_guess):
return -2 * np.mean((np.array(y_ture) - np.array(y_guess)))
def l2_loss(y_ture, y_guess):
return np.mean((np.array(y_ture) - np.array(y_guess)) ** 2)
def y_guess(k, x, b):
return k * x + b
trying_time = 20000
min_loss = float('inf')
best_k, best_b = None, None
learning_rate = 1e-4
k = random.randint(0, 50)
b = random.randint(-50, 50)
pltfigure()
plt.scatter(X_rm, Y, color='red', alpha=0.5)
plt.figure()
plt.scatter(X_rm, Y, color='red', alpha=0.5)
plt.plot(X_rm, y_guess(k, X_rm, b), color='green')
plt.show()
forin range(trying_time):
yhat = y_guess(k, X_rm, b)
L2_loss = l2_loss(Y, yhat)
if L2_loss < min_loss:
best_k = k
best_b = b
min_loss = L2_loss
k = k - partial_k(Y, yhat, X_rm) * learning_rate
b = b - partial_b(Y, yhat) * learning_rate
print('L2loss=', min_loss)
plt.scatter(X_rm, Y, color='red')
plt.plot(X_rm, y_guess(best_k, X_rm, best_b), color='blue')
print('y = {} * x {}'.format(best_k, best_b))
plt.show()