import numpy as np
# 导入 load_diabetes 模块
from sklearn.datasets import load_diabetes
# 导入打乱数据函数
from sklearn.utils import shuffle
def linear_loss(X, y, w, b):
"""
:param X: 输入变量矩阵
:param y: 输出标签向量
:param w: 变量参数权重矩阵
:param b: 偏置
:return:
"""
# 训练样本量
num_train = X.shape[0]
# 训练特征数
num_feature = X.shape[1]
# 线性回归模型预测值
y_hat = np.dot(X, w) + b
# 计算预测值与实际标签之间的均方损失
loss = np.sum((y_hat - y) ** 2) / num_train
# 基于均方损失对权重系数的一阶梯度
dw = np.dot(X.T, (y_hat - y)) / num_train
# 基于均方损失对偏执的一阶梯度
db = np.sum((y_hat - y)) / num_train
"""
y_hat: 线性回归模型预测值;
loss: 均方损失;
dw: 权重系数一阶偏导;
db: 偏置一阶偏导;
"""
return y_hat, loss, dw, db
def initialize_params(dims):
"""
dims: 训练数据的变量维度
"""
# 初始化权重系数为零向量
w = np.zeros((dims, 1))
# 初始化偏置参数为零
b = 0
"""
w: 初始化权重系数
b: 初始化权重参数
"""
return w, b
def linear_train(X, y, learning_rate=0.01, epochs=10000):
"""
:param X: 输入变量矩阵
:param y: 输出标签向量
:param learning_rate: 学习率
:param epochs: 训练迭代次数
"""
# 记录训练损失的空列表
loss_his = []
# 初始化模型参数
w, b = initialize_params(dims=X.shape[1])
params = {}
grads = {}
# 迭代训练
for i in range(1, epochs):
# 计算当前迭代的预测值、均方损失和梯度
y_hat, loss, dw, db = linear_loss(X=X, y=y, w=w, b=b)
# 基于梯度下降法的参数更新
w += -learning_rate * dw
b += -learning_rate * db
# 记录当前迭代的损失
loss_his.append(loss)
# 每 10000 次迭代打印当前损失信息
if 0 == i % 10000:
print(f'epoch {i}, loss {loss}.')
# 将当前迭代步优化后的参数保存到字典中
params = {
'w': w,
'b': b
}
# 将当前迭代步的梯度保存到字典中
grads = {
'dw': dw,
'db': db
}
return loss_his, params, grads
def predict(X, params):
"""
:param X: 测试集
:param params: 模型训练参数
:return: 模型预测结果
"""
w = params['w']
b = params['b']
# 预测
y_pred = np.dot(X, w) + b
return y_pred
def r2_score(y_test, y_pred):
y_avg = np.mean(y_test)
# 总离差平方和
ss_tot = np.sum((y_test - y_avg) ** 2)
# 残差平方和
ss_res = np.sum((y_test - y_pred) ** 2)
r2 = 1 - (ss_res/ss_tot)
return r2
if __name__ == '__main__':
# 获取 diabetes 数据集
diabetes = load_diabetes()
# 获取输入和标签
data, target = diabetes.data, diabetes.target
# 打乱数据集
X, y = shuffle(data, target, random_state=13)
# 按照 8:2 划分训练集和测试集
offset = int(X.shape[0] * 0.8)
# 训练集
X_train, y_train = X[:offset], y[:offset]
# 测试集
X_test, y_test = X[offset:], y[offset:]
# 将训练集改为列向量的形式
y_train = y_train.reshape((-1, 1))
# 将测试集改为列向量的形式
y_test = y_test.reshape((-1, 1))
loss_his, params, grads = linear_train(X=X_train, y=y_train, learning_rate=0.01, epochs=200000)
print(params) # 训练后得到的模型参数
y_pred = predict(X=X_test, params=params)
print(r2_score(y_test=y_test, y_pred=y_pred))
# 方法二
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
# 定义模型实例
regr = linear_model.LinearRegression()
# 模型拟合训练数据
regr.fit(X_train, y_train)
# 模型预测值
y_pred = regr.predict(X_test)
# 输出模型均方误差
print(mean_squared_error(y_true=y_test, y_pred=y_pred))
print(r2_score(y_true=y_test, y_pred=y_pred))
量化交易之机器学习篇 - 实现线性回归模型的两种方式
最新推荐文章于 2024-11-14 16:47:38 发布