代码如下:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np
import matplotlib.pyplot as plt
# 归一化函数
def featureNormalize(X):
mu = X.mean(0)
std = np.std(X)
X = (X - mu) / std
index = np.ones((X.shape[0], 1))
X = np.hstack((X, index))
return X
# 读数据
def getData():
data = np.loadtxt(r"D:\Pycharm\code\MY_machine_learning\mydata\aqi2.csv", delimiter=",", skiprows=1,
dtype=np.float32)
X = data[:, 1:] # 特征集 二维矩阵
Y = data[:, 0] # 目标(标签label)集
Y = Y.reshape(-1, 1) # 将Y转为1列二维矩阵,行数由列数自动计算
# 特征归一化(特征缩放)
X = featureNormalize(X)
return X, Y
# 计算损失函数
def lossFunction(X, Y, theta):
m = X.shape[0] # 样本个数
loss = sum((np.dot(X, theta) - Y) ** 2) / (2 * m)
return loss
# 实现批量梯度下降算法
def batchGradientDecent(X, Y, theta, alpha, num_iters):
m = X.shape[0]; # 样本个数
loss_all = []; # 定义列表
for i in range(num_iters):
theta = theta - alpha * np.dot(X.T, (np.dot(X, theta) - Y)) / m
loss = lossFunction(X, Y, theta)
loss_all.append(loss)
print("第{}次的损失值为:{}".format(i + 1, loss))
return theta, loss_all
if __name__ == "__main__":
# 1. 读数据
X, Y = getData()
# 2. 初始化参数theta的值
theta = np.ones((X.shape[1], 1))
# 3.初始化迭代次数
num_iters = 500
alpha = 0.01
# 4.调用梯度下降算法迭代更新参数
theta, loss_all = batchGradientDecent(X, Y, theta, alpha, num_iters)
print(theta)
# 5.预测
y_predict = np.dot(X, theta)
print('打印预测值:\n', y_predict)
# 6. 可视化数据
# plt.scatter(np.arange(100), Y[:100], c="red")
# plt.scatter(np.arange(100), y_predict[:100], c="green")
# 绘制损失函数与迭代次数的关系曲线
plt.plot(np.arange(num_iters), loss_all, c='r')
plt.show()
print("mae:", mean_absolute_error(Y, y_predict))
print("mse:", mean_squared_error(Y, y_predict))
print("r2:", r2_score(Y, y_predict))
运行结果如下:
附aqi2.csv网盘提取
提取码:uuyp