线性回归
一、理论
线性回归时机器去学习中非常经典的模型,是对自变量和因变量之间的关系进行建模分析。当自变量的数量为1时,称为简单回归,自变量数量大于1时,称为多元回归。在机器学习中,自变量就是特征,因变量就是标签,即学习到一个模型来刻画特征与标签的对应关系,表示为:
f
(
x
;
w
,
b
)
=
w
T
x
+
b
f({\bf{x}}; {\bf w},\bf b) = {\bf w}^\text{T}{\bf x}+\bf b
f(x;w,b)=wTx+b
其中
w
\bf w
w、
b
\bf b
b均是可被学习的参数,
f
(
x
;
w
,
b
)
f({\bf{x}}; {\bf w},\bf b)
f(x;w,b)被称为线性模型。为了表示简单,进行如下转换:
w
^
=
w
⊕
b
≜
[
w
b
]
T
≜
[
w
1
,
w
2
,
.
.
.
,
w
D
,
b
]
T
\hat{\bf w} =\bf w \oplus b \triangleq [\bf w \ \ \ b]^T \triangleq [w_1, w_2,...,w_D,b]^T
w^=w⊕b≜[w b]T≜[w1,w2,...,wD,b]T
x
^
=
x
⊕
1
≜
[
x
1
]
T
≜
[
x
1
,
x
2
,
.
.
.
,
x
D
,
1
]
T
\hat{\bf x} =\bf x \oplus 1 \triangleq [\bf x \ \ \ 1]^T \triangleq [x_1, x_2,...,x_D,1]^T
x^=x⊕1≜[x 1]T≜[x1,x2,...,xD,1]T
其中
w
^
\hat w
w^、
x
^
\hat x
x^分别是增广权重向量和增广特征向量。线性回归的表达式可转换为:
f
(
x
;
w
^
)
=
w
^
T
x
^
f(\bf x;\hat w)=\hat w^T \hat x
f(x;w^)=w^Tx^
为了训练得到一个好的模型,定义损失函数为:
L
(
θ
)
=
1
2
∑
n
=
1
N
(
y
−
f
(
x
;
w
,
b
)
)
2
\mathcal{L}(\theta) =\frac{1}{2}\sum_{n = 1}^{N}(y - f(\bf x ;w,b))^2
L(θ)=21n=1∑N(y−f(x;w,b))2
二、代码实现
实现代码
"""
@File : LinearRegression.py
@Author : BaoLin Yin
@Contact : 932261247@qq.com
@License : (C)Copyright 2018-2021
@Version : V1.0
@Date : 2022/9/5
@Encoding : UTF-8
@Des : The class of linear regression
"""
import numpy as np
import torch
class LinearRegression:
def __init__(self, data, interation_max, learning_rate, gradient_descent_model, bath_size):
self.interation_max = interation_max
self.gradient_descent_model = gradient_descent_model
self.data = data
self.num_data = data.shape[0]
self.bath_size = bath_size
self.lr = learning_rate
def train(self):
# 1、定义y=ax+b中的a、b初始值
a = np.random.random()
b = np.random.random()
loss = []
# 2、开始迭代更新参数
# 2.1、梯度下降三种方式 1全部梯度 2随机梯度 3小批量梯度
data_train_each_interation = None
if self.gradient_descent_model == 1:
data_train_each_interation = self.data
for interation_index in range(self.interation_max):
print("-------------第" + str(interation_index) + "个汇合训练开始-------------")
if self.gradient_descent_model == 2:
data_train_each_interation = np.array([self.data[np.random.randint(low=0, high=self.num_data), :]])
elif self.gradient_descent_model == 3:
data_index = np.random.randint(low=0, high=self.num_data, size=self.bath_size)
data_train_each_interation = self.data[data_index, :]
# 3、求梯度 损失函数为 L = 1 / 2(y_r - y_p) ** 2
a_gradient, b_gradient = self.get_gradient(a, b, data_train_each_interation)
# 4、更新参数
a = a - self.lr * a_gradient
b = b - self.lr * b_gradient
data_x = np.zeros(self.data.shape[0])
data_y = np.zeros(self.data.shape[0])
for data_index in range(self.data.shape[0]):
data_x[data_index] = self.data[data_index][0]
data_y[data_index] = self.data[data_index][1]
loss.append((1 / 2 * (data_y - a * data_x - b) ** 2).mean())
print("-------------第" + str(interation_index) + "个汇合训练结束,误差为" + str((1 / 2 * (data_y - a * data_x - b) ** 2).mean()) + "-------------")
return a, b, loss
@ staticmethod
def get_gradient(a, b, data):
data_x = np.zeros(data.shape[0])
data_y = np.zeros(data.shape[0])
for data_index in range(data.shape[0]):
data_x[data_index] = data[data_index][0]
data_y[data_index] = data[data_index][1]
a_tensor = torch.tensor(a)
b_tensor = torch.tensor(b)
data_x_tensor = torch.tensor(data_x)
data_y_tensor = torch.tensor(data_y)
a_tensor.requires_grad = True
b_tensor.requires_grad = True
loss = 1 / 2 * (data_y_tensor - a_tensor * data_x_tensor - b_tensor) ** 2
loss.backward(torch.ones_like(loss))
a_grad = a_tensor.grad.item() / data.shape[0]
b_grad = b_tensor.grad.item() / data.shape[0]
return a_grad, b_grad
测试代码
"""
@File : Train.py
@Author : BaoLin Yin
@Contact : 932261247@qq.com
@License : (C)Copyright 2018-2021
@Version : V1.0
@Date : 2022/9/5
@Encoding : UTF-8
@Des : Test for linear regression
"""
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from LinearRegression import LinearRegression
if __name__ == '__main__':
print("The training starts......")
# 1、准备数据集
data_init_df = pd.read_csv("Data/train_data.csv", header=0, usecols=['x', 'y'])
data_init_np = data_init_df.to_numpy()
# 2、开始训练
linearregression = LinearRegression(data=data_init_np,
interation_max=200,
learning_rate=0.0001,
gradient_descent_model=3,
bath_size=500)
a, b, loss = linearregression.train()
print("The training has finished.")
print("The value of the parameter a:", a)
print("The value of the parameter b:", b)
# 3、画图
plt.figure()
for data_index in range(data_init_np.shape[0]):
plt.scatter(data_init_np[data_index][0], data_init_np[data_index][1])
x = np.linspace(0, 100)
y = a * x + b
plt.plot(x, y, color='r')
plt.show()
print(loss)
plt.figure()
x = np.linspace(1, len(loss), len(loss))
plt.plot(x, loss)
plt.show()
批量梯度下降、随机梯度下降、小批量梯度下降(bath size = 64)训练的结果与损失函数如下: