数据集:
代码:
import numpy as np
from matplotlib import pylab as pl
# 定义训练数据
# (x,y)
x = np.array([2104, 1416, 1534, 852, ])
y = np.array([460, 232, 315, 178, ])
# 回归方程求取函数
def fit(x, y):
# 确保x y数组长度小相等
if len(x) != len(y):
return
numerator = 0.0
denominator = 0.0
# mean:求均值
x_mean = np.mean(x)
y_mean = np.mean(y)
# 迭代x数组,y数组
for i in range(len(x)):
#
numerator += (x[i] - x_mean) * (y[i] - y_mean)
# square 求平方
denominator += np.square((x[i] - x_mean))
print('numerator:', numerator, 'denominator:', denominator)
b0 = numerator / denominator
b1 = y_mean - b0 * x_mean
return b0, b1
# 定义预测函数
def predit(x, b0, b1):
return b0 * x + b1
# 求取回归方程
b0, b1 = fit(x, y)
print('Line is:y = %2.0fx + %2.0f' % (b0, b1))
# 预测用另一组数据
x_test = np.array([2000, 2100, 1000, 1500])
y_test = np.zeros((1, len(x_test)))
for i in range(len(x_test)):
y_test[0][i] = predit(x_test[i], b0, b1)
# 绘制图像
xx = np.linspace(0, 5)
yy = b0 * xx + b1
pl.plot(xx, yy, 'k-')
pl.scatter(x, y, cmap=pl.cm.Paired)
pl.scatter(x_test, y_test[0], cmap=pl.cm.Paired)
pl.show()
结果
问题:
我们发现b0=0!
解决:特征缩放、均值归一化
均值归一化:
原来:
均值归一化后: