手动实现
'''
使用Python语言实现线性回归(梯度下降)
'''
import numpy as np
import matplotlib.pyplot as plt
x = np.array([0.5, 0.6, 0.8, 1.1, 1.4])
y = np.array([5.0, 5.5, 6.0, 6.8, 7.1])
# 设定初始的模型参数
w1 = 1 # 权重
w0 = 1 # 偏置
learning_rate = 0.01 # 学习率
epoch = 500 # 轮数
w0s, w1s, losses, epoches = [], [], [], []
for i in range(epoch):
# 输出每一轮参数更新之前的损失值以及模型参数
loss = ((w1 * x + w0 - y) ** 2).sum() / 2
print('轮数:{:3},w1:{:.8f},w0:{:.8f},loss:{:.8f}'.format(i + 1, w1, w0, loss))
# 收集w0,w1,loss的变化情况
w0s.append(w0)
w1s.append(w1)
losses.append(loss)
epoches.append(i + 1)
d0 = (w0 + w1 * x - y).sum()
d1 = (x * (w1 * x + w0 - y)).sum()
# 更新w0和w1
w0 = w0 - learning_rate * d0
w1 = w1 - learning_rate * d1
print('w1:{},w0:{}'.format(w1, w0))
pred_y = w1 * x + w0 # 预测值
# plt.scatter(x,y)
# plt.plot(x,pred_y,color='orangered')
# plt.show()
# 模型参数更新过程可视化,及损失值变化情况
plt.figure('params', figsize=(8, 6), facecolor='lightgray')
plt.subplot(3, 1, 1)
plt.plot(epoches, w0s, color='dodgerblue', label='w0')
plt.legend()
plt.subplot(3, 1, 2)
plt.plot(epoches, w1s, color='dodgerblue', label='w1')
plt.legend()
plt.subplot(3, 1, 3)
plt.plot(epoches, losses, color='orangered', label='loss')
plt.legend()
plt.show()
调用sk-learn
'''
基于sklearn提供的API实现线性回归
'''
import pandas as pd
import sklearn.linear_model as lm # 线性模型
import matplotlib.pyplot as plt
import sklearn.metrics as sm # 模型评估模块
import pickle
# 加载数据
data = pd.read_csv('../data_test/Salary_Data.csv')
# 整理输入(二维)和输出(一维)
x = data.iloc[:, :-1] # 所有行不要最后一列
y = data.iloc[:, -1] # 所有行只要最后一列
print('x:', x)
print('y:', y)
# 构建模型
model = lm.LinearRegression() # y = w1x1 + w2x2 + ......wnxn + w0
# 训练模型
model.fit(x, y)
# 执行预测
pred_y = model.predict(x)
print('w:{}'.format(model.coef_[0]))
print('b:{}'.format(model.intercept_))
# 模型可视化
# plt.scatter(x,y)
# plt.plot(x,pred_y,color='orangered')
# plt.show()
# 模型的评估
# 拿到一组数据,作为测试数据,假设测试数据没参加过训练
test_x = x.iloc[::4] # 测试集的输入
test_y = y[::4] # 测试集的输出(真实值)
pred_test_y = model.predict(test_x) # (预测值)
# 平均绝对误差
print(sm.mean_absolute_error(test_y, pred_test_y))
# 中位数绝对偏差
print(sm.median_absolute_error(test_y, pred_test_y))
# r2得分
print(sm.r2_score(test_y, pred_test_y))
# 模型保存与加载
# with open('./model.pickle','wb') as f:
# pickle.dump(model,f)
# print('模型保存成功')
#
#
# with open('./model.pickle','rb') as f:
# model = pickle.load(f)
# model.predict()