Pytorch 学习笔记
注:本篇博文是博主学习过程中保存的个人笔记,并非完善系统地教学内容,仅供参考。
1、线性回归
1.1 手动定义网络
基本顺序
+ 准备数据:将数据分割为feature和label,保存为tensor格式。数据归一化、PCA
+ 构建网络模型:手动定义网络结构、Sequential、继承Module。
+ 训练网络:定义反向传播、loss、优化器
+ 测试应用
import pandas as pd
import numpy as np
import torch
"""
使用手动方法定义网络
"""
"""
1------------------准备数据-----------------------
"""
data_file = "./data/count.csv"
raw_data = pd.read_csv(data_file, index_col=0)
"""得到标签"""
labels = np.array(raw_data['id'])
"""得到无标签的特征"""
features = raw_data.drop("id", axis=1)
"""保存列名,以备后患"""
features_list = list(features.columns)
features = np.array(features) # 转为ndarray
"""数据标准化,收敛速度更快,损失更小"""
from sklearn import preprocessing
input_features = preprocessing.StandardScaler().fit_transform(features)
"""
2------------------构建网络模型-----------------------
"""
x = torch.tensor(input_features, dtype=torch.float)
y = torch.tensor(labels, dtype=torch.float)
# print(x.shape)
"""权重初始化"""
weights = torch.randn((256, 512), dtype=torch.float, requires_grad=True)
biases = torch.randn(512, dtype=torch.float, requires_grad=True)
weights2 = torch.randn((512, 1), dtype=torch.float, requires_grad=True)
biases2 = torch.randn(1, dtype=torch.float, requires_grad=True)
learning_rate = 0.000001
"""
3------------------训练网络-----------------------
"""
losses = []
for i in range(10000):
"""定义网络结构"""
hidden = x.mm(weights) + biases
hidden = torch.relu(hidden)
predictions = hidden.mm(weights2) + biases2
loss = torch.mean((predictions - y)**2)
losses.append(loss.data.numpy())
# 打印损失值
if i % 100 == 0:
print('第', i, '轮:loss:', loss)
loss.backward()
"""更新梯度"""
weights.data.add_(-learning_rate*weights.grad.data)
biases.data.add_(-learning_rate*biases.grad.data)
weights2.data.add_(-learning_rate*weights2.grad.data)
biases2.data.add_(-learning_rate*biases2.grad.data)
"""梯度赋0"""
weights.grad.data.zero_()
biases.grad.data.zero_()
weights2.grad.data.zero_()
biases2.grad.data.zero_()
2、使用函数定义网络
"""
使用pytorch内置方法快速构建网络
"""
import pandas as pd
import numpy as np
import torch
"""
1------------------准备数据-----------------------
"""
# 读取csv文件
data_file = "./data/count.csv"
raw_data = pd.read_csv(data_file, index_col=0)
# 得到标签(0-43)
labels = np.array(raw_data['id'])
# 得到特征(像素数据)
features = raw_data.drop("id", axis=1)
"""数据标准化,收敛速度更快,损失更小"""
# from sklearn import preprocessing
# input_features = preprocessing.StandardScaler().fit_transform(features)
input_features = np.array(features)
x = torch.tensor(input_features, dtype=torch.float)
y = torch.tensor(labels, dtype=torch.float)
"""
2------------------构建网络模型-----------------------
"""
BATCH_SIZE = 11
LEARNING_RATE = 0.0005
# 构建全连接神经网络
my_nn = torch.nn.Sequential(
torch.nn.Linear(256, 1),
)
# 定义损失函数和优化器
cost = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(my_nn.parameters(), lr=LEARNING_RATE)
"""
3------------------训练网络-------------------------
"""
losses = []
acces = []
from sklearn.metrics import accuracy_score
def metric_func(y_pred, y_true):
y_true_array = np.array(y_true.data.tolist())
y_pred_array = np.array(y_pred.data.tolist())
y_pred_array = np.rint(y_pred_array)
return accuracy_score(y_true_array, y_pred_array)
metric_name = "accuracy"
print("Start Training......")
epochs = 2000
for epoch in range(epochs):
batch_loss = []
batch_acc = []
# 定义minibatch的训练
for start in range(0, len(input_features), BATCH_SIZE):
end = start + BATCH_SIZE if start + BATCH_SIZE < len(input_features) else len(input_features)
xx = torch.tensor(input_features[start:end], dtype=torch.float, requires_grad=True)
yy = torch.tensor(labels[start:end], dtype=torch.float, requires_grad=True)
"""xx输入特征,yy标签,prediction预测y"""
optimizer.zero_grad()
prediction = my_nn(xx)
# print("prediction shape",yy.shape)
"""
注意!!!
在计算loss的时候,预测值和标签必须保持维度一致都为一维或者二维。高版本pytorch会报错,但低版本不会报错,会导致忽略这个问题。低版本中,维度不一样仍可以计算,但计算结果完全错误。
"""
prediction = prediction.reshape(BATCH_SIZE) # 改变预测值维度
loss = cost(prediction, yy)
prediction = np.rint(prediction.data)
acc = metric_func(prediction.detach().numpy(), yy)
batch_acc.append(acc)
# print("--------------------------------------")
# print("prediction type", type(prediction))
# print("prediction", prediction)
# print("labels type", type(yy))
# print("labels", yy)
# print("--------------------------------------")
# print("acc:", acc)
loss.backward(retain_graph=True)
optimizer.step()
batch_loss.append(loss.data.numpy())
# 打印batch级别日志
losses.append(np.mean(batch_loss))
acces.append(np.mean(batch_acc))
# 打印损失
if epoch % 100 == 0:
losses.append(np.mean(batch_loss))
print("第{}轮,loss is {}, acc is {}:".format(epoch, np.mean(batch_loss), np.mean(batch_acc)))
with torch.no_grad():
x = torch.tensor(input_features, dtype=torch.float)
predict = my_nn(x).data.numpy()
print(predict)