前言
案例代码https://github.com/2012Netsky/pytorch_cnn/blob/main/4_time_series_bikes.ipynb
一、tensor打印配置
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, threshold=50)
# precision是每一个元素的输出精度,默认是八位;
# threshold是输出时的阈值,当tensor中元素的个数大于该值时,进行缩略输出,默认时1000;
# edgeitems是输出的维度,默认是3;
# linewidth字面意思,每一行输出的长度;
# profile=None,修正默认设置(不太懂,感兴趣的可以试试)

二、读取txt
bikes_numpy = np.loadtxt(
"../data/p1ch4/bike-sharing-dataset/hour-fixed.csv",
dtype=np.float32,
delimiter=",",
skiprows=1,
converters={1: lambda x: float(x[8:10])}) # <1>只显示日期 年、月去掉
bikes = torch.from_numpy(bikes_numpy)
print(bikes[:,1])
bikes.shape, bikes.stride()

三、 重构维度
# (-1,24,17) -1为自动计算剩余维数
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()# (408=17*24, 17=1*17, 1)
# 交换维度 0 1 2 三个维度交换
daily_bikes = daily_bikes.transpose(1, 2)# 1维度和2维度交换
daily_bikes.shape, daily_bikes.stride()

四、提取相关数据
first_day = bikes[:24].long()# 前24个
weather_onehot = torch.zeros(first_day.shape[0], 4)
print(bikes.shape)
print(first_day.shape)
print(first_day[:,9])# 第九列
print(weather_onehot.shape)
print(weather_onehot.shape)
print(weather_onehot)

五、独热编码
# 独热编码
weather_onehot1 = weather_onehot.scatter_(
dim=1,
index=first_day[:,9].unsqueeze(1).long() - 1, # <1>
value=1.0)
torch.set_printoptions(profile="full")
print(weather_onehot1.shape)
print(weather_onehot1)

六、组合与拆分tensor
#torch.cat是将两个张量(tensor)拼接在一起,cat是concatnate的意思,即拼接,联系在一起。
# C = torch.cat( (A,B),0 ) #按维数0拼接(竖着拼)
# C = torch.cat( (A,B),1 ) #按维数1拼接(横着拼)
torch.cat((bikes[:24], weather_onehot), 1)[:1]
print(bikes.shape)
print()
print(bikes[:24].shape)
print(weather_onehot.shape)
print((torch.cat((bikes[:24], weather_onehot), 1)).shape)
print()
# 所有行 第一列
print((torch.cat((bikes[:24], weather_onehot), 1)[:1]).shape)
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4,
daily_bikes.shape[2])
daily_weather_onehot.shape
print(daily_bikes.shape[0], daily_bikes.shape[2])
print(daily_bikes[:,9,:].shape)
# 独热编码
daily_weather_onehot.scatter_(
1, daily_bikes[:,9,:].long().unsqueeze(1) - 1, 1.0)
daily_weather_onehot.shape
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)
print(daily_bikes.shape)
print(daily_weather_onehot.shape)
print(( torch.cat((daily_bikes, daily_weather_onehot), dim=1)).shape)

七、正则化数据
daily_bikes[:, 9, :] = (daily_bikes[:, 9, :] - 1.0) / 3.0
# 变量标准化 对训练有益
# 变量标准化 方法1 映射到范围0,1
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - temp_min)
/ (temp_max - temp_min))
# 变量标准化 方法1 减去均值除以标准差
temp = daily_bikes[:, 10, :]
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - torch.mean(temp))
/ torch.std(temp))


本文介绍了如何使用PyTorch进行时间序列数据预处理,包括tensor打印配置、读取txt文件、维度重构、独热编码、数据组合与拆分,以及正则化技巧。通过实例演示了如何对bike-sharing数据集进行特征工程和编码,适合初学者理解TensorFlow库在时间序列分析中的应用。

2903

被折叠的 条评论
为什么被折叠?



