# -*- coding: utf-8 -*-
"""
在pytorch里,对于多分类问题我们使用
nn.CrossEntropyLoss()和
nn.NLLLoss等来计算softmax交叉熵
"""
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch.nn.functional as F
# 预处理
data = pd.read_csv("./dataset/iris.csv")
print("data.head():\t", data.head())
# 查看有多少类
print("data.Species.unique():\t", data.Species.unique())
# 将不是数值型的编码为数值型 生成的是一个元组(array,index)
pd.factorize(data.Species)
data['Species'] = pd.factorize(data.Species)[0] # 此处要提出array
print("pd.factorize(data.Species):\t", data["Species"])
print("data['Species']",data)
X = data.iloc[:, 1:-1] # 此处依旧是DataFrame
X = data.iloc[:, 1:-1].values # ndarray形式 (Numpy)
print("X.shape:\t", X.shape)
Y = data.Species.values
print("Y.shape:\t", Y.shape)
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(X, Y)
train_x = torch.from_numpy(train_x).type(torch.float32)
train_y = torch.from_numpy(train_y).type(torch.int64)
test_x = torch.from_numpy(test_x).type(torch.float32)
test_y = torch.from_numpy(test_y).type(torch.int64)
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
lr = 0.0001
batch = 8
train_ds = TensorDataset(train_x, train_y)
train_dl = DataLoader(train_ds, batch_size=batch, shuffle=True)
test_ds = TensorDataset(test_x, test_y)
test_dl = DataLoader(test_ds, batch_size=batch)
# 创建模型
class Model(nn.Module):
def __init__(self):
super().__init__()
self.liner_1 = nn.Linear(4, 32)
self.liner_2 = nn.Linear(32, 32)
self.liner_3 = nn.Linear(32, 3)
def forward(self, input):
x = F.relu(self.liner_1(input))
x = F.relu(self.liner_2(x))
x = self.liner_3(x) # 预算结果不激活
return x
model = Model()
print("model:\t", model)
loss_fn = nn.CrossEntropyLoss()
input_batch, label_batch = next(iter(train_dl))
print("input_batch.shape:{}\t label_batch.shape:{}\t".format(input_batch.shape, label_batch.shape))
y_pred = model(input_batch)
print("y_pred.shape:\t", y_pred.shape)
print("y_pred:\t", y_pred)
# 第二个维度上找出最大值
torch.argmax(y_pred, dim=1)
print("torch.argmax(y_pred, dim=1):\t", torch.argmax(y_pred, dim=1))
# 创建训练函数
def accuracy(y_pred, y_true):
y_pred = torch.argmax(y_pred, dim=1) # 模型实际的预测结果
acc = (y_pred == y_true).float().mean()
return acc
train_loss = []
train_acc = []
test_loss = []
test_acc = []
epochs = 20
# 优化器
optim = torch.optim.Adam(model.parameters(), lr = 0.0001)
for epoch in range(epochs):
for x, y in train_dl:
y_pred = model(x)
loss = loss_fn(y_pred, y)
optim.zero_grad()
loss.backward()
optim.step()
with torch.no_grad():
epoch_accuracy = accuracy(model(train_x), train_y)
epoch_loss = loss_fn(model(train_x), train_y).data
epoch_test_accuracy = accuracy(model(test_x), test_y)
epoch_test_loss = loss_fn(model(test_x), test_y).data
print('epoch: ', epoch, 'loss: ', round(epoch_loss.item(), 3),
'accuracy:', round(epoch_accuracy.item(), 3),
'test_loss: ', round(epoch_test_loss.item(), 3),
'test_accuracy:', round(epoch_test_accuracy.item(), 3)
)
train_loss.append(epoch_loss)
train_acc.append(epoch_accuracy)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_accuracy)
plt.plot(range(1, epochs+1), train_loss, label = "train_loss")
plt.plot(range(1, epochs+1), test_loss, label = "test_loss")
plt.legend()
plt.show()
plt.plot(range(1, epochs+1), train_acc, label = "train_acc")
plt.plot(range(1, epochs+1), test_acc, label = "test_acc")
plt.legend()
plt.show()
# 编码一个fit函数,对输入模型、输入数据(train_dl, test_dl),对数据输入在模型上训练,并且返回loss和acc变化
def fit(epoch, model, trainloader, testloader):
correct = 0
total = 0
running_loss = 0
for x, y in trainloader:
y_pred = model(x)
loss = loss_fn(y_pred, y)
optim.zero_grad()
loss.backward()
optim.step()
with torch.no_grad():
y_pred = torch.argmax(y_pred, dim=1)
correct += (y_pred == y).sum().item()
total += y.size(0)
running_loss += loss.item()
epoch_loss = running_loss / len(trainloader.dataset)
epoch_acc = correct / total
test_correct = 0
test_total = 0
test_running_loss = 0
with torch.no_grad():
for x, y in testloader:
y_pred = model(x)
loss = loss_fn(y_pred, y)
y_pred = torch.argmax(y_pred, dim=1)
test_correct += (y_pred == y).sum().item()
test_total += y.size(0)
test_running_loss += loss.item()
epoch_test_loss = running_loss / len(trainloader.dataset)
epoch_test_acc = correct / total
print("epoch:\t{} loss:\t{} accuracy:\t{} test_loss:\t{} test_accuracy:\t{}".format(epoch, round(epoch_loss, 3), round(epoch_acc, 3), round(epoch_test_loss, 3), round(epoch_test_acc, 3) ) )
return epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc
model = Model()
optim = torch.optim.Adam(model.parameters(), lr=0.0001)
epochs = 20
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc = fit(epoch, model, train_dl, test_dl)
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
test_loss.append(epoch_test_loss)
test_loss.append(epoch_test_acc)
data.head(): Unnamed: 0 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
0 1 5.1 3.5 1.4 0.2 setosa
1 2 4.9 3.0 1.4 0.2 setosa
2 3 4.7 3.2 1.3 0.2 setosa
3 4 4.6 3.1 1.5 0.2 setosa
4 5 5.0 3.6 1.4 0.2 setosa
data.Species.unique(): ['setosa' 'versicolor' 'virginica']
pd.factorize(data.Species): 0 0
1 0
2 0
3 0
4 0
..
145 2
146 2
147 2
148 2
149 2
Name: Species, Length: 150, dtype: int64
data['Species'] Unnamed: 0 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
0 1 5.1 3.5 1.4 0.2 0
1 2 4.9 3.0 1.4 0.2 0
2 3 4.7 3.2 1.3 0.2 0
3 4 4.6 3.1 1.5 0.2 0
4 5 5.0 3.6 1.4 0.2 0
.. ... ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3 2
146 147 6.3 2.5 5.0 1.9 2
147 148 6.5 3.0 5.2 2.0 2
148 149 6.2 3.4 5.4 2.3 2
149 150 5.9 3.0 5.1 1.8 2
[150 rows x 6 columns]
X.shape: (150, 4)
Y.shape: (150,)
model: Model(
(liner_1): Linear(in_features=4, out_features=32, bias=True)
(liner_2): Linear(in_features=32, out_features=32, bias=True)
(liner_3): Linear(in_features=32, out_features=3, bias=True)
)
input_batch.shape:torch.Size([8, 4]) label_batch.shape:torch.Size([8])
y_pred.shape: torch.Size([8, 3])
y_pred: tensor([[ 0.1636, 0.3386, -0.1905],
[ 0.2741, 0.3774, -0.1597],
[ 0.1756, 0.3636, -0.1717],
[ 0.3066, 0.3792, -0.1657],
[ 0.3369, 0.4197, -0.2274],
[ 0.4031, 0.4753, -0.1934],
[ 0.2937, 0.4121, -0.1834],
[ 0.1579, 0.3304, -0.1846]], grad_fn=<AddmmBackward>)
torch.argmax(y_pred, dim=1): tensor([1, 1, 1, 1, 1, 1, 1, 1])
epoch: 0 loss: 1.131 accuracy: 0.384 test_loss: 1.17 test_accuracy: 0.184
epoch: 1 loss: 1.116 accuracy: 0.384 test_loss: 1.158 test_accuracy: 0.184
epoch: 2 loss: 1.103 accuracy: 0.384 test_loss: 1.146 test_accuracy: 0.184
epoch: 3 loss: 1.09 accuracy: 0.384 test_loss: 1.138 test_accuracy: 0.184
epoch: 4 loss: 1.079 accuracy: 0.384 test_loss: 1.126 test_accuracy: 0.184
epoch: 5 loss: 1.068 accuracy: 0.384 test_loss: 1.116 test_accuracy: 0.184
epoch: 6 loss: 1.058 accuracy: 0.384 test_loss: 1.106 test_accuracy: 0.184
epoch: 7 loss: 1.049 accuracy: 0.384 test_loss: 1.095 test_accuracy: 0.184
epoch: 8 loss: 1.04 accuracy: 0.384 test_loss: 1.086 test_accuracy: 0.184
epoch: 9 loss: 1.032 accuracy: 0.384 test_loss: 1.077 test_accuracy: 0.184
epoch: 10 loss: 1.024 accuracy: 0.384 test_loss: 1.068 test_accuracy: 0.184
epoch: 11 loss: 1.017 accuracy: 0.384 test_loss: 1.061 test_accuracy: 0.184
epoch: 12 loss: 1.01 accuracy: 0.384 test_loss: 1.052 test_accuracy: 0.184
epoch: 13 loss: 1.003 accuracy: 0.384 test_loss: 1.045 test_accuracy: 0.184
epoch: 14 loss: 0.997 accuracy: 0.384 test_loss: 1.037 test_accuracy: 0.184
epoch: 15 loss: 0.99 accuracy: 0.384 test_loss: 1.028 test_accuracy: 0.184
epoch: 16 loss: 0.984 accuracy: 0.393 test_loss: 1.02 test_accuracy: 0.184
epoch: 17 loss: 0.978 accuracy: 0.402 test_loss: 1.012 test_accuracy: 0.211
epoch: 18 loss: 0.972 accuracy: 0.438 test_loss: 1.005 test_accuracy: 0.289
epoch: 19 loss: 0.966 accuracy: 0.473 test_loss: 0.998 test_accuracy: 0.316
epoch: 0 loss: 0.142 accuracy: 0.286 test_loss: 0.142 test_accuracy: 0.286
epoch: 1 loss: 0.14 accuracy: 0.286 test_loss: 0.14 test_accuracy: 0.286
epoch: 2 loss: 0.139 accuracy: 0.277 test_loss: 0.139 test_accuracy: 0.277
epoch: 3 loss: 0.137 accuracy: 0.357 test_loss: 0.137 test_accuracy: 0.357
epoch: 4 loss: 0.136 accuracy: 0.491 test_loss: 0.136 test_accuracy: 0.491
epoch: 5 loss: 0.134 accuracy: 0.5 test_loss: 0.134 test_accuracy: 0.5
epoch: 6 loss: 0.133 accuracy: 0.509 test_loss: 0.133 test_accuracy: 0.509
epoch: 7 loss: 0.131 accuracy: 0.5 test_loss: 0.131 test_accuracy: 0.5
epoch: 8 loss: 0.13 accuracy: 0.5 test_loss: 0.13 test_accuracy: 0.5
epoch: 9 loss: 0.129 accuracy: 0.5 test_loss: 0.129 test_accuracy: 0.5
epoch: 10 loss: 0.128 accuracy: 0.518 test_loss: 0.128 test_accuracy: 0.518
epoch: 11 loss: 0.126 accuracy: 0.509 test_loss: 0.126 test_accuracy: 0.509
epoch: 12 loss: 0.125 accuracy: 0.518 test_loss: 0.125 test_accuracy: 0.518
epoch: 13 loss: 0.124 accuracy: 0.527 test_loss: 0.124 test_accuracy: 0.527
epoch: 14 loss: 0.123 accuracy: 0.554 test_loss: 0.123 test_accuracy: 0.554
epoch: 15 loss: 0.122 accuracy: 0.598 test_loss: 0.122 test_accuracy: 0.598
epoch: 16 loss: 0.121 accuracy: 0.652 test_loss: 0.121 test_accuracy: 0.652
epoch: 17 loss: 0.12 accuracy: 0.705 test_loss: 0.12 test_accuracy: 0.705
epoch: 18 loss: 0.119 accuracy: 0.795 test_loss: 0.119 test_accuracy: 0.795
epoch: 19 loss: 0.118 accuracy: 0.893 test_loss: 0.118 test_accuracy: 0.893