1.生成数据
x = np.array([0,1,2,3,4,5,6,7,8,9,10])
y = np.array([12,14,18,19,21,23,24,26,27,29,31])#显示数据
plt.scatter(x,y)
plt.show()
2.算术法求解
#对数据集进行3:7的拆分
x_train = x[:7].reshape(-1,1)
x_test = x[7:].reshape(-1,1)
y_train = y[:7]
y_test = y[7:]
然后利用公式对系数和截距进行计算
b
1
=
∑
i
=
1
n
(
x
(
i
)
−
x
‾
)
(
y
(
i
)
−
y
‾
)
∑
i
=
1
n
(
x
(
i
)
−
x
‾
)
2
b1 = \frac{\sum_{i=1}^{n}(x^{(i)}-\overline{x})(y^{(i)}-\overline{y})}{\sum_{i=1}^{n}(x^{(i)}-\overline{x})^2}
b1=∑i=1n(x(i)−x)2∑i=1n(x(i)−x)(y(i)−y)
b
0
=
y
‾
−
b
1
∗
x
‾
b0 = \overline{y}-b1 *\overline{x}
b0=y−b1∗x
num = 0
d = 0
n = len(x_train)
for i in range(0,len(x_train)):
num += (x_train[i]-np.mean(x_train))*(y_train[i]-np.mean(y_train)) #分子
d += (x_train[i]-np.mean(x_train))**2 #分母
b1 = 1.0*num / float(d)
b0 = np.mean(y_train) - b1*np.mean(x_train)
得到b0,b1的结果分别为
(array([2.03571429]), array([12.60714286]))
将直线绘制出来
x_ = np.linspace(0,10,100)
y_ = b1*x_ + b0
plt.scatter(x,y)
plt.plot(x_,y_,c='r')
plt.show()
3.向量法求解
regression = linear_model.LinearRegression()#创建回归模型
regression.fit(x_train,y_train)
b0,b1 = regression.intercept_,regression.coef_
得到b1,b0的值分别为
(12.60714285714286, array([2.03571429]))
再将直线绘制出来
4.算术法向量法的性能比较
通过计算两者的SSE进行比较
SSE = 0
pre = b0 + b1*x_test
for i in range(0,4):
SSE += (y_test[i]-pre[i])**2
print(SSE)
得到两者的SSE都大约为
[11.89540816]