0、怎么计算程序运行时间
from timeit import default_timer as timer
start_time = timer()
#代码块开始
print((1+2))
#代码块结束
end_time = timer()
print(f"[程序总运行时间:{end_time-start_time:.3f} seconds")
3
[程序总运行时间:0.000 seconds
1、什么是分类问题以及分类问题的分类?
import torch
# ^^^ pyforest auto-imports - don't write above this line
分类问题的分类:
二分类:目标是两个选项之一,例子:是否患有心脏病
多分类:目标可以是两个以上选项之一,例子:是食物,人,还是狗。
多标签分类:可以为目标分配多个选项。预测一本书:分配到哪些类别:(数学、科学和哲学)
2、分类神经网络的架构
超参数 二进制分类 多类分类
输入层形状(in_features) 与特征数量相同(例如:心脏病预测中的年龄、性别、身高、体重) 与二分类相同
隐藏层(hidden laylers) 具体问题,最小值,最大值=无限制 与二分类相同
每个隐藏层的神经元 具体问题,一般为10到512个 与二分类相同
输出层形状(out_features) 一类或者另一类 每类一个
隐藏层激活 ReLU线性单元。 与二分类相同
输出激活 sigmoid(torch.sigmoid) Softmax(torch.softmax)
损失函数 binary loss(torch.nn.BCELoss在Pytorch中) 交叉熵(torch.nn.CrossEntropyloss)
优化器 SGD(随机梯度下降),Adam()
# 优化器距离
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)
optimizer = optim.Adam([var1, var2], lr = 0.0001)
3、准备好二进制分类数据
from sklearn.datasets import make_circles
# 采样样本数量为1000
n_samples = 1000
# 创建一个圆形的数据集
X, y = make_circles(n_samples,
noise=0.03, # 加入一些噪音
random_state=42) # 加入随机种子保证每次结果一致。
print(f"\n前 5个 X 特征:\n{X[:5]}")
print("\n")
print(f"\n前 5个 y 标签:\n{y[:5]}")
前 5个 X 特征:
[[ 0.75424625 0.23148074]
[-0.75615888 0.15325888]
[-0.81539193 0.17328203]
[-0.39373073 0.69288277]
[ 0.44220765 -0.89672343]]
前 5个 y 标签:
[1 1 1 1 0]
import pandas as pd
circles = pd.DataFrame({"X1": X[:, 0],#选取第一列
"X2": X[:, 1],#选取第二列
"标签": y #选取标签y
})
circles.head(10)
X1 | X2 | 标签 | |
---|---|---|---|
0 | 0.754246 | 0.231481 | 1 |
1 | -0.756159 | 0.153259 | 1 |
2 | -0.815392 | 0.173282 | 1 |
3 | -0.393731 | 0.692883 | 1 |
4 | 0.442208 | -0.896723 | 0 |
5 | -0.479646 | 0.676435 | 1 |
6 | -0.013648 | 0.803349 | 1 |
7 | 0.771513 | 0.147760 | 1 |
8 | -0.169322 | -0.793456 | 1 |
9 | -0.121486 | 1.021509 | 0 |
circles['标签'].value_counts()
1 500
0 500
Name: 标签, dtype: int64
# 可视化
import matplotlib.pyplot as plt
plt.scatter(x=X[:, 0],
y=X[:, 1],
c=y,
cmap=plt.cm.RdYlBu);
让我们构建一个Pytorch神经网络来将分类为红色(0)和蓝色(1)
输入输出形状
X.shape, y.shape
((1000, 2), (1000,))
X_sample = X[0]
y_sample = y[0]
print(f"一个X采样的值 X: {X_sample} and the same for y: {y_sample}")
print(f"一个X采样的值的形状X: {X_sample.shape} and the same for y: {y_sample.shape}")
一个X采样的值 X: [0.75424625 0.23148074] and the same for y: 1
一个X采样的值的形状X: (2,) and the same for y: ()
4、将数据转换为张量并创建拆分训练集和测试集
import torch
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)
# View the first five samples
X[:5], y[:5]
(tensor([[ 0.7542, 0.2315],
[-0.7562, 0.1533],
[-0.8154, 0.1733],
[-0.3937, 0.6929],
[ 0.4422, -0.8967]]),
tensor([1., 1., 1., 1., 0.]))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=0.2, # 20% 测试集, 80% 训练集
random_state=42) # 随机划分的种子
len(X_train), len(X_test), len(y_train), len(y_test)
(800, 200, 800, 200)
5、构建 PyTorch 分类模型
1.设置与设备无关的代码(如果可用,我们的模型可以在 CPU 或 GPU 上运行)。
2.通过子类化构建模型nn.Module。
3.定义损失函数和优化器。
4.创建一个训练循环。
# 1.设置与设备无关的代码
import torch
from torch import nn
# Make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device
'cpu'
class CircleModelV0(nn.Module):
def __init__(self):
super().__init__()
#两个线性层
self.layer_1 = nn.Linear(in_features=2, out_features=5) # 输入两个特征(X),输出5 个隐藏单元或神经元
self.layer_2 = nn.Linear(in_features=5, out_features=1) # 接收5个神经元, 生产出一 1 feature (y)
# 3. 定义一个前向传播的方法
def forward(self, x):
# 返回第2层的输出,一个单一的特征,与y的形状相同。
return self.layer_2(self.layer_1(x)) # 计算先经过第1层,然后第1层的输出再经过第2层。
# 4. 创建一个模型的实例,把他放在目标设备
model_0 = CircleModelV0().to(device)
model_0
CircleModelV0(
(layer_1): Linear(in_features=2, out_features=5, bias=True)
(layer_2): Linear(in_features=5, out_features=1, bias=True)
)
6、可视化神经网络模型的结构
# https://playground.tensorflow.org/
# 您也可以使用nn.Sequential.为什么不总使用nn.Sequential,因为它是 按照顺序 来计算的。
model_0 = nn.Sequential(
nn.Linear(in_features = 2,out_features = 5),
nn.Linear(in_features = 5,out_features = 1)
).to(device)
model_0
Sequential(
(0): Linear(in_features=2, out_features=5, bias=True)
(1): Linear(in_features=5, out_features=1, bias=True)
)
untrained_preds = model_0(X_test.to(device))
print(f"Length of predictions: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Length of test samples: {len(y_test)}, Shape: {y_test.shape}")
print(f"\nFirst 10 predictions:\n{untrained_preds[:10]}")
print(f"\nFirst 10 test labels:\n{y_test[:10]}")
Length of predictions: 200, Shape: torch.Size([200, 1])
Length of test samples: 200, Shape: torch.Size([200])
First 10 predictions:
tensor([[-0.1415],
[-0.1357],
[-0.0911],
[-0.1561],
[ 0.0132],
[ 0.0160],
[-0.0502],
[-0.0144],
[-0.0956],
[-0.1341]], grad_fn=<SliceBackward>)
First 10 test labels:
tensor([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.])
7、设置函数和优化器
优化器名称 问题类型 PyTorch代码
随机梯度下降优化器 分类、回归等等 torch.optim.SGD()
Adam优化器 分类、回归等等 torch.optim.Adam()
二元交叉熵损失 二进制分类 torch.BCELossWithLogits或者torch.nn.BCELoss
交叉熵损失 多级分类 torch.nn.CrossEntopyLoss
平均绝对误差(MAE)或L1损失 回归 torch.nn.L1Loss
均方误差(MSE)或L2损失 回归 torch.nn.MSELoss
损失函数:是衡量模型预测的错误程度的方法,损失越高,模型越差。
torch.nn.BCEWithLogitsLoss()指出,它比torch.nn.BCELoss()在nn.Sigmoid一层之后使用在数值上更稳定。
# 让我们创建一个损失函数和一个优化器。
# loss_fn = nn.BCELoss() # BCELoss = no sigmoid built-in
loss_fn = nn.BCEWithLogitsLoss() # BCEWithLogitsLoss = sigmoid built-in
# 创建一个优化器
optimizer = torch.optim.SGD(params=model_0.parameters(),
lr=0.1)
# 评估指标视为衡量模型的正确程度。
def accuracy_fn(y_true, y_pred):
correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
acc = (correct / len(y_pred)) * 100
return acc
8、将模型拟合到数据(训练)
从原始模型输出到预测标签
y_logits = model_0(X_test.to(device))[:5]
y_logits
tensor([[-0.1415],
[-0.1357],
[-0.0911],
[-0.1561],
[ 0.0132]], grad_fn=<SliceBackward>)
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs
tensor([[0.4647],
[0.4661],
[0.4772],
[0.4611],
[0.5033]], grad_fn=<SigmoidBackward>)
# 为了在预测标签中转换我们的预测概率,我们可以对 sigmoid 激活函数的输出进行四舍五入
# Find the predicted labels (round the prediction probabilities)
y_preds = torch.round(y_pred_probs)
# In full
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))[:5]))
# Check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))
# Get rid of extra dimension
y_preds.squeeze()
tensor([True, True, True, True, True])
tensor([0., 0., 0., 0., 1.], grad_fn=<SqueezeBackward0>)
y_test[:5]
tensor([1., 0., 1., 0., 1.])
9、建立一个训练和测试的循环
torch.manual_seed(42)
# Set the number of epochs
epochs = 100
# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)
# Build training and evaluation loop
for epoch in range(epochs):
### Training
model_0.train()
# 1. Forward pass (model outputs raw logits)
y_logits = model_0(X_train).squeeze() # squeeze to remove extra `1` dimensions, this won't work unless model and data are on same device
y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labls
# 2. Calculate loss/accuracy
# loss = loss_fn(torch.sigmoid(y_logits), # Using nn.BCELoss you need torch.sigmoid()
# y_train)
loss = loss_fn(y_logits, # Using nn.BCEWithLogitsLoss works with raw logits
y_train)
acc = accuracy_fn(y_true=y_train,
y_pred=y_pred)
# 3. Optimizer zero grad
optimizer.zero_grad()
# 4. Loss backwards
loss.backward()
# 5. Optimizer step
optimizer.step()
### Testing
model_0.eval()
with torch.no_grad():
# 1. Forward pass
test_logits = model_0(X_test).squeeze()
test_pred = torch.round(torch.sigmoid(test_logits))
# 2. Caculate loss/accuracy
test_loss = loss_fn(test_logits,
y_test)
test_acc = accuracy_fn(y_true=y_test,
y_pred=test_pred)
# Print out what's happening every 10 epochs
if epoch % 10 == 0:
print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.69443, Accuracy: 45.00% | Test loss: 0.69336, Test acc: 45.50%
Epoch: 10 | Loss: 0.69395, Accuracy: 47.38% | Test loss: 0.69309, Test acc: 48.00%
Epoch: 20 | Loss: 0.69367, Accuracy: 48.00% | Test loss: 0.69302, Test acc: 48.50%
Epoch: 30 | Loss: 0.69349, Accuracy: 48.38% | Test loss: 0.69303, Test acc: 47.00%
Epoch: 40 | Loss: 0.69337, Accuracy: 48.38% | Test loss: 0.69308, Test acc: 47.50%
Epoch: 50 | Loss: 0.69328, Accuracy: 49.00% | Test loss: 0.69316, Test acc: 46.00%
Epoch: 60 | Loss: 0.69321, Accuracy: 49.62% | Test loss: 0.69324, Test acc: 45.00%
Epoch: 70 | Loss: 0.69316, Accuracy: 49.38% | Test loss: 0.69332, Test acc: 43.50%
Epoch: 80 | Loss: 0.69313, Accuracy: 48.62% | Test loss: 0.69340, Test acc: 49.00%
Epoch: 90 | Loss: 0.69310, Accuracy: 48.62% | Test loss: 0.69347, Test acc: 48.50%
10、做出预测和评估模型
import requests
from pathlib import Path
# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
print("helper_functions.py already exists, skipping download")
else:
print("Downloading helper_functions.py")
request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
with open("helper_functions.py", "wb") as f:
f.write(request.content)
from helper_functions import plot_predictions, plot_decision_boundary
helper_functions.py already exists, skipping download
# Plot decision boundaries for training and test sets
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_0, X_test, y_test)
11、改进模型(从模型的角度)
# 尝试解决模型的欠拟合问题。
模型改进技术:
1.添加更多图层
2.添加更多隐藏单元。
3.更长时间训练
4.更改激活函数
5.改变学习率
6.改变损失函数
7.使用迁移学习
# 可以手动调整的参数,他们被称为:超参数
class CircleModelV1(nn.Module):
def __init__(self):
super().__init__()
self.layer_1 = nn.Linear(in_features=2, out_features=10)
self.layer_2 = nn.Linear(in_features=10, out_features=10) # 多加一层
self.layer_3 = nn.Linear(in_features=10, out_features=1)
def forward(self, x): # note: always make sure forward is spelt correctly!
# Creating a model like this is the same as below, though below
# generally benefits from speedups where possible.
# z = self.layer_1(x)
# z = self.layer_2(z)
# z = self.layer_3(z)
# return z
return self.layer_3(self.layer_2(self.layer_1(x)))
model_1 = CircleModelV1().to(device)
model_1
CircleModelV1(
(layer_1): Linear(in_features=2, out_features=10, bias=True)
(layer_2): Linear(in_features=10, out_features=10, bias=True)
(layer_3): Linear(in_features=10, out_features=1, bias=True)
)
loss_fn = nn.BCEWithLogitsLoss() # 不需要在输入时使用 sigmoid
optimizer = torch.optim.SGD(model_1.parameters(), lr=0.1)
torch.manual_seed(42)
epochs = 1000 # Train for longer
# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)
for epoch in range(epochs):
### Training
# 1. Forward pass
y_logits = model_1(X_train).squeeze()
y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> predicition probabilities -> prediction labels
# 2. Calculate loss/accuracy
loss = loss_fn(y_logits, y_train)
acc = accuracy_fn(y_true=y_train,
y_pred=y_pred)
# 3. Optimizer zero grad
optimizer.zero_grad()
# 4. Loss backwards
loss.backward()
# 5. Optimizer step
optimizer.step()
### Testing
model_1.eval()
with torch.no_grad():
# 1. Forward pass
test_logits = model_1(X_test).squeeze()
test_pred = torch.round(torch.sigmoid(test_logits))
# 2. Caculate loss/accuracy
test_loss = loss_fn(test_logits,
y_test)
test_acc = accuracy_fn(y_true=y_test,
y_pred=test_pred)
# Print out what's happening every 10 epochs
if epoch % 100 == 0:
print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.69396, Accuracy: 50.88% | Test loss: 0.69261, Test acc: 51.00%
Epoch: 100 | Loss: 0.69305, Accuracy: 50.38% | Test loss: 0.69379, Test acc: 48.00%
Epoch: 200 | Loss: 0.69299, Accuracy: 51.12% | Test loss: 0.69437, Test acc: 46.00%
Epoch: 300 | Loss: 0.69298, Accuracy: 51.62% | Test loss: 0.69458, Test acc: 45.00%
Epoch: 400 | Loss: 0.69298, Accuracy: 51.12% | Test loss: 0.69465, Test acc: 46.00%
Epoch: 500 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69467, Test acc: 46.00%
Epoch: 600 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
Epoch: 700 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
Epoch: 800 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
Epoch: 900 | Loss: 0.69298, Accuracy: 51.00% | Test loss: 0.69468, Test acc: 46.00%
12、准备数据,看看我们的模型是否可以对直线进行建模
weight = 0.7
bias = 0.3
start = 0
end = 1
step = 0.01
# 生成数据
X_regression = torch.arange(start, end, step).unsqueeze(dim=1)
y_regression = weight * X_regression + bias # linear regression formula
# 查看这些数据
print(len(X_regression))
X_regression[:5], y_regression[:5]
100
(tensor([[0.0000],
[0.0100],
[0.0200],
[0.0300],
[0.0400]]),
tensor([[0.3000],
[0.3070],
[0.3140],
[0.3210],
[0.3280]]))
# 把数据分成训练集和测试集
train_split = int(0.8 * len(X_regression)) # 80%的数据用作训练集
X_train_regression, y_train_regression = X_regression[:train_split], y_regression[:train_split]
X_test_regression, y_test_regression = X_regression[train_split:], y_regression[train_split:]
# Check the lengths of each split
print(len(X_train_regression),
len(y_train_regression),
len(X_test_regression),
len(y_test_regression))
80 80 20 20
plot_predictions(train_data=X_train_regression,
train_labels=y_train_regression,
test_data=X_test_regression,
test_labels=y_test_regression
);
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-BgidV99C-1663317439054)(output_59_0.png)]
13、调整model_1以适应直线
model_2 = nn.Sequential(
nn.Linear(in_features=1, out_features=10),
nn.Linear(in_features=10, out_features=10),
nn.Linear(in_features=10, out_features=1)
).to(device)
model_2
Sequential(
(0): Linear(in_features=1, out_features=10, bias=True)
(1): Linear(in_features=10, out_features=10, bias=True)
(2): Linear(in_features=10, out_features=1, bias=True)
)
# 我们将损失函数设置为nn.L1Loss()(与平均绝对误差相同),将优化器设置为torch.optim.SGD().
# 损失函数
loss_fn = nn.L1Loss()
# 创建一个优化器
optimizer = torch.optim.SGD(model_2.parameters(), lr=0.1)
torch.manual_seed(42)
# Set the number of epochs
epochs = 1000
# Put data to target device
X_train_regression, y_train_regression = X_train_regression.to(device), y_train_regression.to(device)
X_test_regression, y_test_regression = X_test_regression.to(device), y_test_regression.to(device)
for epoch in range(epochs):
### Training
# 1. Forward pass
y_pred = model_2(X_train_regression)
# 2. Calculate loss (no accuracy since it's a regression problem, not classification)
loss = loss_fn(y_pred, y_train_regression)
# 3. Optimizer zero grad
optimizer.zero_grad()
# 4. Loss backwards
loss.backward()
# 5. Optimizer step
optimizer.step()
### Testing
model_2.eval()
with torch.no_grad():
# 1. Forward pass
test_pred = model_2(X_test_regression)
# 2. Calculate the loss
test_loss = loss_fn(test_pred, y_test_regression)
# Print out what's happening
if epoch % 100 == 0:
print(f"Epoch: {epoch} | Train loss: {loss:.5f}, Test loss: {test_loss:.5f}")
Epoch: 0 | Train loss: 0.75986, Test loss: 0.54143
Epoch: 100 | Train loss: 0.09309, Test loss: 0.02901
Epoch: 200 | Train loss: 0.07376, Test loss: 0.02850
Epoch: 300 | Train loss: 0.06745, Test loss: 0.00615
Epoch: 400 | Train loss: 0.06107, Test loss: 0.02004
Epoch: 500 | Train loss: 0.05698, Test loss: 0.01061
Epoch: 600 | Train loss: 0.04857, Test loss: 0.01326
Epoch: 700 | Train loss: 0.06109, Test loss: 0.02127
Epoch: 800 | Train loss: 0.05600, Test loss: 0.01425
Epoch: 900 | Train loss: 0.05571, Test loss: 0.00603
# 好的,与model_1分类数据不同,看起来model_2损失实际上正在下降。
model_2.eval()
# Make predictions (inference)
with torch.no_grad():
y_preds = model_2(X_test_regression)
# Plot data and predictions with data on the CPU (matplotlib can't handle data on the GPU)
# (try removing .cpu() from one of the below and see what happens)
plot_predictions(train_data=X_train_regression.cpu(),
train_labels=y_train_regression.cpu(),
test_data=X_test_regression.cpu(),
test_labels=y_test_regression.cpu(),
predictions=y_preds.cpu());
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-PalCcxOz-1663317439055)(output_66_0.png)]
14、非线性
我们已经看到我们的模型可以绘制直线(线性),这要归功于它的线性层。
但是我们赋予它绘制非直线(非线性)线的能力怎么样?
如何?
让我们来了解一下。
15、重建非线性数据(红色和蓝色圆圈)
# 生成和可视化数据
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
n_samples = 1000
X, y = make_circles(n_samples=1000,
noise=0.03,
random_state=42,
)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu);
# 现在让我们使用 80% 的数据用于训练和 20% 的数据用于测试,将其拆分为训练集和测试集。
# 转换为tensor然后将他们分成训练集和测试集。
import torch
from sklearn.model_selection import train_test_split
# Turn data into tensors
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=0.2,
random_state=42
)
X_train[:5], y_train[:5]
(tensor([[ 0.6579, -0.4651],
[ 0.6319, -0.7347],
[-1.0086, -0.1240],
[-0.9666, -0.2256],
[-0.1666, 0.7994]]),
tensor([1., 0., 0., 0., 1.]))
16、建立非线性函数
# 用非线性激活函数创建一个模型
from torch import nn
class CircleModelV2(nn.Module):
def __init__(self):
super().__init__()
self.layer_1 = nn.Linear(in_features=2, out_features=10)
self.layer_2 = nn.Linear(in_features=10, out_features=10)
self.layer_3 = nn.Linear(in_features=10, out_features=1)
self.relu = nn.ReLU() # <- add in ReLU activation function
# Can also put sigmoid in the model
# This would mean you don't need to use it on the predictions
# self.sigmoid = nn.Sigmoid()
def forward(self, x):
# Intersperse the ReLU activation function between layers
return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))
model_3 = CircleModelV2().to(device)
print(model_3)
CircleModelV2(
(layer_1): Linear(in_features=2, out_features=10, bias=True)
(layer_2): Linear(in_features=10, out_features=10, bias=True)
(layer_3): Linear(in_features=10, out_features=1, bias=True)
(relu): ReLU()
)
# 建立损失函数和优化器
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model_3.parameters(), lr=0.1)
17、训练非线性模型
torch.manual_seed(42)
epochs = 1000
# Put all data on target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)
for epoch in range(epochs):
# 1. Forward pass
y_logits = model_3(X_train).squeeze()
y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> prediction probabilities -> prediction labels
# 2. Calculate loss and accuracy
loss = loss_fn(y_logits, y_train) # BCEWithLogitsLoss calculates loss using logits
acc = accuracy_fn(y_true=y_train,
y_pred=y_pred)
# 3. Optimizer zero grad
optimizer.zero_grad()
# 4. Loss backward
loss.backward()
# 5. Optimizer step
optimizer.step()
### Testing
model_3.eval()
with torch.no_grad():
# 1. Forward pass
test_logits = model_3(X_test).squeeze()
test_pred = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probabilities -> prediction labels
# 2. Calcuate loss and accuracy
test_loss = loss_fn(test_logits, y_test)
test_acc = accuracy_fn(y_true=y_test,
y_pred=test_pred)
# Print out what's happening
if epoch % 100 == 0:
print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.69295, Accuracy: 50.00% | Test Loss: 0.69319, Test Accuracy: 50.00%
Epoch: 100 | Loss: 0.69115, Accuracy: 52.88% | Test Loss: 0.69102, Test Accuracy: 52.50%
Epoch: 200 | Loss: 0.68977, Accuracy: 53.37% | Test Loss: 0.68940, Test Accuracy: 55.00%
Epoch: 300 | Loss: 0.68795, Accuracy: 53.00% | Test Loss: 0.68723, Test Accuracy: 56.00%
Epoch: 400 | Loss: 0.68517, Accuracy: 52.75% | Test Loss: 0.68411, Test Accuracy: 56.50%
Epoch: 500 | Loss: 0.68102, Accuracy: 52.75% | Test Loss: 0.67941, Test Accuracy: 56.50%
Epoch: 600 | Loss: 0.67515, Accuracy: 54.50% | Test Loss: 0.67285, Test Accuracy: 56.00%
Epoch: 700 | Loss: 0.66659, Accuracy: 58.38% | Test Loss: 0.66322, Test Accuracy: 59.00%
Epoch: 800 | Loss: 0.65160, Accuracy: 64.00% | Test Loss: 0.64757, Test Accuracy: 67.50%
Epoch: 900 | Loss: 0.62362, Accuracy: 74.00% | Test Loss: 0.62145, Test Accuracy: 79.00%
18、评估使用非线性激活函数训练的模型
# 做预测
model_3.eval()
with torch.no_grad():
y_preds = torch.round(torch.sigmoid(model_3(X_test))).squeeze()
y_preds[:10], y[:10] # want preds in same format as truth labels
(tensor([1., 0., 1., 0., 0., 1., 0., 0., 1., 0.]),
tensor([1., 1., 1., 1., 0., 1., 1., 1., 1., 0.]))
# Plot decision boundaries for training and test sets
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_1, X_train, y_train) # model_1 = no non-linearity
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_3, X_test, y_test) # model_3 = has non-linearity
好的!不完美,但仍然比以前好得多.
您是否可以尝试一些技巧来提高模型的测试准确性!
19、复制非线性激活函数
我们之前看到了如何向我们的模型添加非线性激活函数可以帮助它对非线性数据进行建模
A = torch.arange(-10, 10, 1, dtype=torch.float32)
A
tensor([-10., -9., -8., -7., -6., -5., -4., -3., -2., -1., 0., 1.,
2., 3., 4., 5., 6., 7., 8., 9.])
plt.plot(A);
# 手动创建一个ReLu函数
def relu(x):
return torch.maximum(torch.tensor(0), x)#输入必须是一个张量
# 调用这个函数
relu(A)
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 3., 4., 5., 6., 7.,
8., 9.])
plt.plot(relu(A));
# 手动创建一个sigmoid函数
def sigmoid(x):
return 1 / (1 + torch.exp(-x))
sigmoid(A)
tensor([4.5398e-05, 1.2339e-04, 3.3535e-04, 9.1105e-04, 2.4726e-03, 6.6929e-03,
1.7986e-02, 4.7426e-02, 1.1920e-01, 2.6894e-01, 5.0000e-01, 7.3106e-01,
8.8080e-01, 9.5257e-01, 9.8201e-01, 9.9331e-01, 9.9753e-01, 9.9909e-01,
9.9966e-01, 9.9988e-01])
plt.plot(sigmoid(A));
20、通过构建多分类Pytorch模型将所有东西放在一起
创建多分类数据
# 1.使用 .创建一些多类数据make_blobs()。
# 2.将数据转换为张量(默认make_blobs()是使用 NumPy 数组)。
# 3.使用 . 将数据拆分为训练集和测试集train_test_split()。
# 4.可视化数据。
# 导入依赖包
import torch
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
# Set the hyperparameters for data creation
NUM_CLASSES = 4 #分几类
NUM_FEATURES = 2 #特征的个数
RANDOM_SEED = 42 #随机种子
# 1.创建多分类数据
X_blob, y_blob = make_blobs(n_samples=1000,#数据条数
n_features=NUM_FEATURES, # X 的特征
centers=NUM_CLASSES, # y 标签
cluster_std=1.5, # 给数据集一个小的震动干扰(试着把这个改为1.0,默认的)
random_state=RANDOM_SEED
)
# 2. 将数据转换为张量
X_blob = torch.from_numpy(X_blob).type(torch.float)
y_blob = torch.from_numpy(y_blob).type(torch.LongTensor)
print(X_blob[:5], y_blob[:5])#打印前五条数据
# 3. 将数据分为训练集和测试集
X_blob_train, X_blob_test, y_blob_train, y_blob_test = train_test_split(X_blob,
y_blob,
test_size=0.2,
random_state=RANDOM_SEED
)
# 4. 可视化数据
plt.figure(figsize=(10, 7))
plt.scatter(X_blob[:, 0], X_blob[:, 1], c=y_blob, cmap=plt.cm.RdYlBu);
tensor([[-8.4134, 6.9352],
[-5.7665, -6.4312],
[-6.0421, -6.7661],
[ 3.9508, 0.6984],
[ 4.2505, -0.2815]]) tensor([3, 2, 2, 1, 1])
21、在 PyTorch 中构建多类分类模型
device = "cuda" if torch.cuda.is_available() else "cpu"
device
'cpu'
from torch import nn
# 创建模型
class BlobModel(nn.Module):
def __init__(self, input_features, output_features, hidden_units=8):
"""Initializes all required hyperparameters for a multi-class classification model.
Args:
input_features (int): 模型的输入特征数
out_features (int): 模型的输出特征数(也就是分类的类别数).
hidden_units (int): 隐藏层的神经元个数, default 8.
"""
super().__init__()
self.linear_layer_stack = nn.Sequential(
#三层的线性神经网络
nn.Linear(in_features=input_features, out_features=hidden_units),
# nn.ReLU(), # <- does our dataset require non-linear layers? (try uncommenting and see if the results change)
nn.Linear(in_features=hidden_units, out_features=hidden_units),
# nn.ReLU(), # <- does our dataset require non-linear layers? (try uncommenting and see if the results change)
nn.Linear(in_features=hidden_units, out_features=output_features), # how many classes are there?
)
def forward(self, x):
return self.linear_layer_stack(x)
# 生成一个BlobModel的实例并把它转换到目标设备上。
model_4 = BlobModel(input_features=NUM_FEATURES,
output_features=NUM_CLASSES,
hidden_units=8).to(device)
model_4
BlobModel(
(linear_layer_stack): Sequential(
(0): Linear(in_features=2, out_features=8, bias=True)
(1): Linear(in_features=8, out_features=8, bias=True)
(2): Linear(in_features=8, out_features=4, bias=True)
)
)
22、为多分类Pytorch模型创建损失函数和优化器
# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
# 创建一个SGD的优化器
optimizer = torch.optim.SGD(model_4.parameters(),
lr=0.1)
23、获取多类 PyTorch 模型的预测概率
model_4(X_blob_train.to(device))[:5]
tensor([[-1.2711, -0.6494, -1.4740, -0.7044],
[ 0.2210, -1.5439, 0.0420, 1.1531],
[ 2.8698, 0.9143, 3.3169, 1.4027],
[ 1.9576, 0.3125, 2.2244, 1.1324],
[ 0.5458, -1.2381, 0.4441, 1.1804]], grad_fn=<SliceBackward>)
# 检查一下特征的维度和分类的类别
model_4(X_blob_train.to(device))[0].shape, NUM_CLASSES
(torch.Size([4]), 4)
# 输出从logits—(经过softmax变化)—>prediction probabilities—(经过argmax(dim=1))—>prediction labels
# torch.softmax(y_logits, dim=1).argmax(dim=1)
# 预测输出原始的logits
y_logits = model_4(X_test.to(device))
# 将预测的logits转换为概率
y_pred_probs = torch.softmax(y_logits, dim=1)
print(y_logits[:5])
print(y_pred_probs[:5])
tensor([[ 0.2341, -0.3357, 0.2307, 0.2534],
[ 0.1198, -0.3702, 0.0998, 0.1887],
[ 0.3790, -0.2037, 0.4095, 0.2689],
[ 0.1936, -0.3733, 0.1807, 0.2496],
[ 0.1338, -0.1378, 0.1487, 0.0247]], grad_fn=<SliceBackward>)
tensor([[0.2792, 0.1579, 0.2782, 0.2846],
[0.2729, 0.1672, 0.2675, 0.2924],
[0.2869, 0.1602, 0.2958, 0.2570],
[0.2769, 0.1571, 0.2733, 0.2928],
[0.2722, 0.2075, 0.2763, 0.2441]], grad_fn=<SliceBackward>)
# 每一类的概率相加起来等于1.
torch.sum(y_pred_probs[0])
tensor(1., grad_fn=<SumBackward0>)
# 这些预测概率本质上是在说明模型认为目标X样本(输入)映射到每个类的程度
print(y_pred_probs[0])
print(torch.argmax(y_pred_probs[0]))
tensor([0.2792, 0.1579, 0.2782, 0.2846], grad_fn=<SelectBackward>)
tensor(3)
# 对于多类分类问题,要将 logits 转换为预测概率,您可以使用 softmax 激活函数 ( torch.softmax)。
24、为多分类Pytorch模型创建训练和测试循环
# 设置随机种子
torch.manual_seed(42)
# 数据集跑多少轮
epochs = 100
# 将数据放入目标设备
X_blob_train, y_blob_train = X_blob_train.to(device), y_blob_train.to(device)
X_blob_test, y_blob_test = X_blob_test.to(device), y_blob_test.to(device)
for epoch in range(epochs):
### 训练模式
model_4.train()
# 1. 前向传播
y_logits = model_4(X_blob_train) # 模型输出原始逻辑值
#print(y_logits)
# go from logits -> prediction probabilities -> prediction labels
y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)
# 2. 计算损失和准确率
loss = loss_fn(y_logits, y_blob_train)
acc = accuracy_fn(y_true=y_blob_train,
y_pred=y_pred)
# 3. 优化器梯度置零
optimizer.zero_grad()
# 4. 损失反向传播
loss.backward()
# 5. 优化一步一步优化
optimizer.step()
### 测试模式
model_4.eval()
with torch.no_grad():
# 1. 前向传播
test_logits = model_4(X_blob_test)
test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)
# 2. 计算测试损失和正确率
test_loss = loss_fn(test_logits, y_blob_test)
test_acc = accuracy_fn(y_true=y_blob_test,
y_pred=test_pred)
# Print out what's happening
if epoch % 10 == 0:
print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Acc: {test_acc:.2f}%")
Epoch: 0 | Loss: 0.02564, Acc: 99.25% | Test Loss: 0.01499, Test Acc: 99.50%
Epoch: 10 | Loss: 0.02555, Acc: 99.25% | Test Loss: 0.01485, Test Acc: 99.50%
Epoch: 20 | Loss: 0.02547, Acc: 99.25% | Test Loss: 0.01472, Test Acc: 99.50%
Epoch: 30 | Loss: 0.02539, Acc: 99.25% | Test Loss: 0.01460, Test Acc: 99.50%
Epoch: 40 | Loss: 0.02531, Acc: 99.25% | Test Loss: 0.01448, Test Acc: 99.50%
Epoch: 50 | Loss: 0.02524, Acc: 99.25% | Test Loss: 0.01437, Test Acc: 99.50%
Epoch: 60 | Loss: 0.02517, Acc: 99.25% | Test Loss: 0.01427, Test Acc: 99.50%
Epoch: 70 | Loss: 0.02510, Acc: 99.25% | Test Loss: 0.01417, Test Acc: 99.50%
Epoch: 80 | Loss: 0.02504, Acc: 99.25% | Test Loss: 0.01407, Test Acc: 99.50%
Epoch: 90 | Loss: 0.02498, Acc: 99.25% | Test Loss: 0.01398, Test Acc: 99.50%
25、使用 PyTorch 多类模型进行预测和评估
model_4.eval()#评估模式
with torch.no_grad():
y_logits = model_4(X_blob_test)
# 查看前十条的预测情况
y_logits[:10]
tensor([[ 6.1082, 15.6307, -20.3789, -13.3291],
[ 7.1512, -18.6590, 5.0168, 14.9331],
[ -8.2533, -19.4368, 28.1576, 16.7717],
[ 2.4859, 11.6492, -11.9552, -9.7569],
[ 11.5548, 4.4087, -19.5344, -4.4759],
[ 7.9877, -23.1785, 7.3417, 18.6040],
[ -8.8218, -14.5741, 25.1713, 12.7964],
[ 10.1165, -1.6657, -12.6617, 0.6567],
[ -8.2441, -27.4645, 34.4467, 23.4065],
[ 10.4595, 0.5678, -14.9170, -1.2161]])
# 将最原始的额预测y_logits转换为概率
y_pred_probs = torch.softmax(y_logits, dim=1)
# 将预测概率转换为预测标签
y_preds = y_pred_probs.argmax(dim=1)
# 比较前10个模型测试集和数据和测试标签的准确性
print(f"Predictions: {y_preds[:10]}\nLabels: {y_blob_test[:10]}")
print(f"Test accuracy: {accuracy_fn(y_true=y_blob_test, y_pred=y_preds)}%")
Predictions: tensor([1, 3, 2, 1, 0, 3, 2, 0, 2, 0])
Labels: tensor([1, 3, 2, 1, 0, 3, 2, 0, 2, 0])
Test accuracy: 99.5%
plt.figure(figsize=(12, 6))#画布大小
plt.subplot(1, 2, 1)#画布位置,1行 2列,第一个图
plt.title("Train")#绘画标题
plot_decision_boundary(model_4, X_blob_train, y_blob_train)
plt.subplot(1, 2, 2)#画布位置,1行 2列,第二个图
plt.title("Test")#绘画标题
plot_decision_boundary(model_4, X_blob_test, y_blob_test)
``
# 26、更多分类评价指标
```python
# 准确率 torchmetrics.Accuracy()或者sklearn.metrics.accuracy_score()
# 精确率
1.torchmetrics.Precision()
2.sklearn.metrics.precision_score()
# 召回率
1.torchmetrics.Recall()
2.sklearn.metrics.recall_score()
# F1分数
1.torchmetrics.F1Score()
2.sklearn.metrics.f1_score()
# 混淆矩阵
1.torchmetrics.ConfusionMatrix
2.sklearn.metrics.plot_confusion_matrix()
# 分类报告
# 1.sklearn.metrics.classification_report()
# 安装torchmetrics
!pip -q install torchmetrics
from torchmetrics import Accuracy
# 创建一个矩阵确保他们在目标设备中
torchmetrics_accuracy = Accuracy().to(device)
# 计算准确率
torchmetrics_accuracy(y_preds, y_blob_test)
tensor(0.9950)