写在前面
理论没什么好讲的,参考之前的文章. 本文只是加了多分类的可视化.
请看之前的文章:各个深度模型解读(3) softmax 回归 (多分类)
实战
包导入 与超参数设置
import torchsnooper as torchsnooper
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np
import torch
import torchvision
# %%
device = torch.device('cpu' if torch.cuda.is_available() else "cpu")
random_seed = 0
learning_rate = 0.05
num_epochs = 20
batch_size = 8
num_features = 2
num_classes = 3
数据生成
data = np.genfromtxt('http://garden-lu-oss.oss-cn-beijing.aliyuncs.com/data/iris.data', delimiter=',', dtype=str) # 先下载鸢尾花数据集,并利用numpy形成数据集的格式
X, y = data[:, [2, 3]], data[:, 4] # 鸢尾花数据集一个有5列,第3,第4列作为输入特征,第5列为label值
X = X.astype(float) # 将数据变成float浮点数
d = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2} # 这是label值,与数据集提供的几类结果做一个映射
y = np.array([d[x] for x in y]) #y值原来是一堆字符串,现在变成一堆数字
y = y.astype(np.int) # 将y值变成整数 便于计算
print('Class label counts:', np.bincount(y)) # bincount用于计算非负整数中从0到最大值每个数出现的次数
print("X.shape:", X.shape)
print("Y.shape:", y.shape)
shuffle_idx = np.arange(y.shape[0])
shuffle_rng = np.random.RandomState(123) # 定义一个随机数生成器,并且保证这个随机值在之后可以复现
shuffle_rng.shuffle(shuffle_idx)
X, y = X[shuffle_idx], y[shuffle_idx]
X_train, X_test = X[shuffle_idx[:70]], X[shuffle_idx[70:]]
y_train, y_test = y[shuffle_idx[:70]], y[shuffle_idx[70:]]
mu, sigma = X_train.mean(axis=0), X_train.std(axis=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma
加载数据
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
class MyDataset(Dataset):
def __init__(self, X, y):
self.X = torch.tensor(X, dtype=torch.float32)
self.y = torch.tensor(y, dtype=torch.int64)
def __getitem__(self, index):
training_example, training_label = self.X[index], self.y[index]
return training_example, training_label
def __len__(self):
return self.y.shape[0]
# 创建两个容纳了前100/101-150数据的数据容器
train_dataset = MyDataset(X[:100], y[:100])
test_dataset = MyDataset(X[100:], y[100:])
# 创建两个数据加载器 这里并没有使用数据增强方法transform
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
定义模型
class SoftmaxRegression(torch.nn.Module):
def __init__(self, num_features, num_classes):
super(SoftmaxRegression, self).__init__()
self.linear = torch.nn.Linear(num_features, num_classes)
self.linear.weight.detach().zero_()
self.linear.bias.detach().zero_()
def forward(self, x):
logits = self.linear(x)
probas = F.softmax(logits, dim=1) # dim=0代表按列softmax,列和等于1;dim=1代表按行softmax ,行和等于1
return logits, probas
model = SoftmaxRegression(num_features=num_features, num_classes=num_classes)
model.to(device)
# 定义优化器,即定义参数更新策略
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
训练与评估
torch.manual_seed(random_seed) # 固定一下,为了之后的可以复现,可以注意一下,这个参数在这里指定一次就OK,之后无需再理会.
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for features, targets in data_loader:
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
# torch.max(a,0)返回每一列中最大值的那个元素,且返回索引(返回最大元素在这一列的行索引)
# torch.max(a,1)返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float() / num_examples * 100
for epoch in range(num_epochs):
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
# 这里需要注意的一点是,交叉熵输入的是logits,而不是经过softmax之后的probas
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward() # 计算cost
optimizer.step() # 更新参数
if not batch_idx % 50:
print('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
% (epoch + 1, num_epochs, batch_idx,
len(train_dataset) // batch_size, cost))
with torch.set_grad_enabled(False):
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch + 1, num_epochs,
compute_accuracy(model, train_loader)))
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
mlxtend 可视化展示
class ModelWrapper():
def __init__(self, model, device):
self.model = model
self.device = device
def predict(self, X):
features = torch.tensor(X, dtype=torch.float32, device=self.device)
logits, probas = self.model(features)
_, predicted_labels = torch.max(probas, 1)
return predicted_labels.numpy()
# 在已经训练的model的基础上,在封装一下,并加入predict函数
mymodel = ModelWrapper(model, device=torch.device('cpu'))
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
# 然后利用我们强大的mlxtend库中带有的函数来直接可视化.
plot_decision_regions(X, y, mymodel)
plt.show()
![](https://img-blog.csdnimg.cn/img_convert/7efcc7cb190dceae864504ff63fd259b.png