import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
class Unary_Linear_Regression:
"""
一元线性回归模型: y = ax + b
"""
def __init__(self, x, y):
# 定义机器学习设置参数
self.EPOCH = 200 # 迭代次数
self.Batch = 3 # 每批选取的样本数
self.learning_rate = 0.1 # 学习率α
self.train_num = 0 # 训练集数量
self.test_num = 0 # 测试集数量
# 标准化参数
self.x_mean = 0 # 变量x的平均值
self.x_std = 0 # 变量x的标准差
self.y_mean = 0 # 变量y的平均值
self.y_std = 0 # 变量y的标准差
# 参数a,b
self.a = 0 # 初始化为0
self.b = 0
# 数据集
self.x = x
self.y = y
self.train_x, self.train_y, self.test_x, self.test_y = self.read_dataset()
def read_dataset(self):
"""
处理数据集默认数据集是干净的,不存在空值nan或字符串类型,若有,请修改函数或修改数据集
:return: x, y
"""
# 设置训练集测试集大小
self.train_num = int(len(self.x)*0.8) # 训练集用70%数据
self.test_num = len(self.x) - self.train_num # 测试集用30%数据
# 计算标准化参数
self.x_mean = self.x.mean()
self.x_std = self.x.std()
self.y_mean = self.y.mean()
self.y_std = self.y.std()
# 合并离散点
dataset = []
for i in range(len(self.x)):
dataset.append([self.x[i], self.y[i]])
# 打乱
random.shuffle(dataset)
new_x, new_y = [], []
# 拆分
for i in range(len(dataset)):
new_x.append(dataset[i][0])
new_y.append(dataset[i][1])
new_x = np.array(new_x)
new_y = np.array(new_y)
return new_x[:self.train_num+1], new_y[:self.train_num+1], new_x[self.train_num+1:], new_y[self.train_num+1:]
def z_score(self, name, t):
# Z-score标准化
if name == 'x':
t = (t-self.x_mean) / self.x_std
elif name == 'y':
t = (t-self.y_mean) / self.y_std
return t
def inverse_z_score(self, name, t):
# 逆Z-score-将标准值还原为真实值
if name == 'x':
t = t*self.x_std + self.x_mean
elif name == 'y':
t = t*self.y_std + self.y_mean
return t
def h(self, x):
# 预测函数h(x)
return self.a * x + self.b
def j(self, h, y):
# 代价(损失)函数J(β1,β2)
return (1/(2 * self.Batch)) * np.sum((h - y) ** 2)
def p(self, x, y, h): # 梯度下降算法更新参数值a,b
# 计算梯度
u1 = (1 / self.Batch) * np.sum(x * (h - y)) # 同时计算代价函数J(β1,β2)的偏导数
u2 = (1 / self.Batch) * np.sum(h - y)
# 更新参数
self.a = self.a - self.learning_rate * u1 # 同时更新参数a,b
self.b = self.b - self.learning_rate * u2
def restore(self):
self.b = self.y_mean + self.b * self.y_std - (self.a * self.x_mean * self.y_std / self.x_std)
self.a = (self.a * self.y_std) / self.x_std # 注意此处,应该先更新b值, 否则逆变换导致a值失真!!!
def train(self): # 训练数据集
for i in range(self.EPOCH): # 迭代次数(训练轮数)
for k in range(int(self.train_num / self.Batch)): # 将数据集划分为n个样本,k为当前样本序号
# 从数据集中获取第j个样本
data_x = self.train_x[k*self.Batch:k*self.Batch+self.Batch]
data_y = self.train_y[k*self.Batch:k*self.Batch+self.Batch]
# z-score标准化
data_x = self.z_score('x', data_x)
data_y = self.z_score('y', data_y)
# 线性回归
h = self.h(data_x) # 计算预测函数
j = self.j(h, data_y) # 计算代价函数
self.p(data_x, data_y, h) # 梯度下降完成回归
print(f"a:{self.a}, b:{self.b}, j = {j}")
def test(self): # 测试数据集
# 返回损失值
j = 0
data_x = self.z_score('x', self.test_x)
data_y = self.z_score('y', self.test_y)
h = self.h(data_x)
for i in range(len(h)):
j += (h[i] - data_y[i]) ** 2
return j/self.test_num
12-05
1207
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交