小白学Pytorch使用（2-1）：气温预测回归-手动构建网络模型

悸惓

已于 2024-04-07 15:17:42 修改

阅读量456

点赞数 9

分类专栏： AI学习 Pytorch框架文章标签： pytorch 人工智能

于 2024-03-18 11:34:40 首次发布

本文链接：https://blog.csdn.net/qq_43737512/article/details/136803857

版权

AI学习同时被 2 个专栏收录

12 篇文章 0 订阅

订阅专栏

Pytorch框架

9 篇文章 1 订阅

订阅专栏

任务背景

利用年、月、日、周几、前天的最高温度值、昨天的最高温度值、历史这天平均最高温度值来预测当天的实际温度值，数据如下：
气温预测数据集

一、导入库

# 矩阵计算库
import numpy as np
# 数据基本处理库，可读取csv文件
import pandas as pd
# 画图展示库
import matplotlib.pyplot as plt
# pytorch框架
import torch
# 优化器
import torch.optim as optim
# 处理时间数据
import datetime
# 归一化数据
from sklearn import preprocessing
# 警告
import warnings
warnings.filterwarnings("ignore")

二、处理数据集

# 更改为自己的文件夹路径
features = pd.read_csv('D:/咕泡人工智能-配套资料/配套资料/4.第四章 深度学习核⼼框架PyTorch/第二,三章：神经网络实战分类与回归任务/神经网络实战分类与回归任务/temps.csv')
# print(features.head())
'''
   year  month  day  week  temp_2  temp_1  average  actual  friend
0  2016      1    1   Fri      45      45     45.6      45      29
1  2016      1    2   Sat      44      45     45.7      44      61
2  2016      1    3   Sun      45      44     45.8      41      56
3  2016      1    4   Mon      44      41     45.9      40      53
4  2016      1    5  Tues      41      40     46.0      44      41
year,moth,day,week分别表示的具体的时间
temp_2：前天的最高温度值
temp_1：昨天的最高温度值
average：在历史中，每年这一天的平均最高温度值
actual：这就是我们的标签值了，当天的真实最高温度
friend：这一列可能是凑热闹的，你的朋友猜测的可能值，咱们不管它就好了
'''
# print(features.shape)       #(348, 9)

# 获取年月日数据
years = features['year']
months = features['month']
days = features['day']

# 转换为datatime格式，年-月-日
dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, '%Y-%m-%d')for date in dates]
# print(dates[:1])    #[datetime.datetime(2016, 1, 1, 0, 0)] 2016.1.1

三、数据展示

# 画图，设置图像风格
plt.style.use('fivethirtyeight')

# 设置布局
# 2行2列10*10图像布局，ax1,ax2, ax3, ax4四个子图
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
#x轴标签倾斜45度
fig.autofmt_xdate(rotation=45)

#标签值
ax1.plot(dates, features['actual'])
ax1.set_xlabel('')
ax1.set_ylabel('Temperature')
ax1.set_title('Max Temp')
#昨天
ax2.plot(dates, features['temp_1'])
ax2.set_xlabel('')
ax2.set_ylabel('Temperature')
ax2.set_title('Previous Max Temp')
#前天
ax3.plot(dates, features['temp_2'])
ax3.set_xlabel('Date')
ax3.set_ylabel('Temperature')
ax3.set_title('Two Days Prior Max Temp')
#朋友预测（无用数据）
ax4.plot(dates, features['friend'])
ax4.set_xlabel('Date')
ax4.set_ylabel('Temperature')
ax4.set_title('Friend Estimate')
#子图间隔为3
plt.tight_layout(pad=3)
#画图
plt.show()

数据展示图：
在这里插入图片描述
四、数据预处理

#独热编码
#week列数据均为字符串类型，将week列字符串转变为七位编码，如周一：1000000、周二：0100000
features = pd.get_dummies(features)
# print(features.head(5))
'''
   year  month  day  temp_2  ...  week_Sun  week_Thurs  week_Tues  week_Wed
0  2016      1    1      45  ...     False       False      False     False
1  2016      1    2      44  ...     False       False      False     False
2  2016      1    3      45  ...      True       False      False     False
3  2016      1    4      44  ...     False       False      False     False
4  2016      1    5      41  ...     False       False       True     False
'''
#即将进行标准化操作，预先将数据中x，y提出
#提出标签-y
labels = np.array(features['actual'])
# print(labels)
#去除标签列-y，只剩特征x
features = features.drop('actual', axis=1)
# print(features)
#名单单独保存备份
features_list = list(features.columns)
#转换为数组格式
features = np.array(features)
# print(features.shape)       #(348, 14)

#特征数据取值范围不同，数值差异较大，神经网络具有数值越大认为越重要的特性，因此要进行标准化
#标准化操作：（x-均值）/标准差   去均值（数据以原点为中心）--->去标准差（数据x、y方向分布均衡，以原点中心对称）
input_features = preprocessing.StandardScaler().fit_transform(features)
# print(type(input_features), input_features.shape, input_features[0])
'''
<class 'numpy.ndarray'> , (348, 14) [ 0.         -1.5678393  -1.65682171 -1.48452388 -1.49443549 -1.3470703
 -1.98891668  2.44131112 -0.40482045 -0.40961596 -0.40482045 -0.40482045
 -0.41913682 -0.40482045] 
'''

五、构建网络模型

#数据转换为tensor格式
x = torch.tensor(input_features, dtype=float)
y = torch.tensor(labels, dtype=float)

#权重参数初始化
#随机初始化参数，requires_grad是否计算梯度
weight = torch.randn((14, 128), dtype=float, requires_grad=True)
biases = torch.randn(128, dtype=float, requires_grad=True)
weight2 = torch.randn((128, 1), dtype=float, requires_grad=True)
biases2 = torch.randn(1, dtype=float, requires_grad=True)

#学习率
lr = 0.001
#损失值
losses = []

#一个隐藏层的神经网络，迭代1000次
for i in range(1000):
    #计算隐藏层，wx+b
    hidden = x.mm(weight) + biases
    #非线性激活函数relu
    hidden = torch.relu(hidden)
    #预测结果
    predictions = hidden.mm(weight2) + biases2
    #计算损失
    loss = torch.mean((predictions - y)**2)
    #计算出的损失值为tensor格式，画图需要转为ndarray格式，储存在数组losses
    losses.append(loss.data.numpy())

    #每隔100次打印损失值
    if i % 100 == 0:
        print('loss is', loss)

    #反向传播计算
    loss.backward()

    #参数更新
    weight.data.add_(- lr * weight.grad.data)
    biases.data.add_(- lr * biases.grad.data)
    weight2.data.add_(- lr * weight2.grad.data)
    biases2.data.add_(- lr * biases2.grad.data)

    #清空梯度值
    weight.grad.data.zero_()
    biases.grad.data.zero_()
    weight2.grad.data.zero_()
    biases2.grad.data.zero_()