import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import datetime
from sklearn import preprocessing
from torch import nn
features = pd.read_csv('temps.csv')
#print(type(features))
#看看数据长什么样子
#print(features.head(n = 5))
#print("数据维度", features.shape)
years = features["year"]
months = features["month"]
days = features["day"]
for year, month, day in zip(years, months, days):
dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) ]
for date in dates:
dates = [datetime.datetime.strptime(date, '%Y-%m-%d')]
#print(dates[:5])
# 独热编码
features = pd.get_dummies(features) #get_dummies()把相同的字符串转化成0-1矩阵
#print(features.head(5))
# 标签
labels = np.array(features['actual'])
# 在特征中去掉标签
#print(features.shape)
features= features.drop('actual', axis = 1) #去掉actual列
#
#print(features.shape)
#features= features.drop(0, axis = 0) #去掉第一行
#print(features.shape)
# 名字单独保存一下,以备后患
feature_list = list(features.columns)
# 转换成合适的格式
features = np.array(features) #把数据从DataFrame格式转化为array格式
input_features = preprocessing.StandardScaler().fit_transform(features)
########一定要记得标准化!!!!!
#构建网络模型
x = torch.tensor(input_features, dtype = float) #把数据从array格式转化为tensor格式
y = torch.tensor(labels, dtype = float)
#构建网络模型
input_size = input_features.shape[1]
#print(input_size)
hidden_size = 128
output_size = 1
batch_size = 16
my_nn = torch.nn.Sequential(
torch.nn.Linear(input_size, hidden_size),
torch.nn.ReLU(), #使用ReLU时,RLU要大写,不然会报错
torch.nn.Linear(hidden_size, output_size),
)
x = x.squeeze(-1)
cost = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(my_nn.parameters(), lr = 0.001)
# 训练网络
losses = []
for i in range(1000):
batch_loss = []
# MINI-Batch方法来进行训练
for start in range(0, len(input_features), batch_size):
end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
xx = torch.tensor(input_features[start:end], dtype=torch.float, requires_grad=True)
yy = torch.tensor(labels[start:end], dtype=torch.float, requires_grad=True)
prediction = my_nn(xx)
loss = cost(prediction, yy)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
batch_loss.append(loss.data.numpy())
# 打印损失
if i % 100 == 0:
losses.append(np.mean(batch_loss))
print(i, np.mean(batch_loss))
注意点:
1.ReLU和Sigmoid函数可以替换,即两个不同的激活函数,但是ReLU在使用时,注意R、L、U、要大写,e不用。S也要大写。
2.一般来说,学习率小的比大的好,当你的层数变多时,你的学习率也要变小,不然会使得梯度变为0。
3.重点在torch.nn.Sequential()的使用:见(25条消息) Pytorch系列1: torch.nn.Sequential()讲解_xddwz的博客-CSDN博客