对cifar100进行图像分类,并且进行可视化分析
cifar图像进行预处理
数据集地址:https://www.kaggle.com/datasets/fedesoriano/cifar100
1、解压数据集
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
metadata_path = '/kaggle/input/cifar100/meta' # change this path`\
metadata = unpickle(metadata_path)
superclass_dict = dict(list(enumerate(metadata[b'coarse_label_names'])))
superclass_dict
2、对数据集进行读取
data_pre_path = '/kaggle/input/cifar100/' # change this path
# File paths
data_train_path = data_pre_path + 'train'
data_test_path = data_pre_path + 'test'
# Read dictionary
data_train_dict = unpickle(data_train_path)
data_test_dict = unpickle(data_test_path)
# Get data (change the coarse_labels if you want to use the 100 classes)
data_train = data_train_dict[b'data']
label_train = np.array(data_train_dict[b'coarse_labels'])
data_test = data_test_dict[b'data']
label_test = np.array(data_test_dict[b'coarse_labels'])
这里数据集中图像的维度是(5000,3072),它将维度为(3,32,32)的图像压缩成1维的numpy数组。
3、还原图像
img = data_train[0]
label = label_train[0]
img_r = img[:1024].reshape(32,32)
img_g = img[1024:2048].reshape(32,32)
img_b = img[2048:].reshape(32,32)
img = np.dstack((img_r, img_g, img_b))
print(img.shape)
plt.figure()
plt.imshow(img)
plt.title(label)
plt.show()
建立数据加载格式
class cifar_dataset(Dataset):
def __init__(self,img_ls,label_ls):
self.img_ls = img_ls
self.label_ls = label_ls
def __len__(self):
return self.img_ls.shape[0]
def __getitem__(self,index):
img = self.img_ls[index]/255.0
label = self.label_ls[index]
img_r = img[:1024].reshape(32,32)
img_g = img[1024:2048].reshape(32,32)
img_b = img[2048:].reshape(32,32)
img_np = np.dstack((img_r, img_g, img_b))
img = np.transpose(img_np, (2, 0, 1))
img = torch.from_numpy(img)
return img,label
自定义一个Dataset的抽象类,重写__len__和__getitem__方法。__len__方法返回数据集的长度,__getitem__方法可以索引数据集中的一个样本。
数据可视化
img,label = next(iter(train_dl))
print(img.shape,label.shape)
print(label)
from matplotlib import pyplot as plt
plt.figure(figsize=(12,8))
for i,data in enumerate(zip(img[0:3],label[0:3])):
img,label = data
img = img.squeeze().numpy().transpose(1,2,0)
label = label.squeeze().numpy()
print(img.shape)
print(img.max(),img.min())
# print(max(label),min(label))
plt.subplot(1,3,i+1)
plt.imshow(img)
plt.title(label)
plt.show()
建立网络
# 加载预训练模型
model = models.resnet34(pretrained=True)
model.fc = nn.Linear(model.fc.in_features,100)
使用pytorch中torchvision自带的网络模型,使用预训练权重,并且修改全连接层,使之符合目标任务。
训练和测试网络
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(model.fc.parameters(),lr=1e-3)
scheduler = optim.lr_scheduler.CyclicLR(optimizer,base_lr=1e-4,cycle_momentum=False,max_lr=5e-3,step_size_up=400,step_size_down=200)
CIFAR100图像分类任务中的损失函数通常是交叉熵损失函数(Cross-entropy Loss),也称为对数损失函数(Log Loss)。该损失函数用来度量模型预测输出与实际输出之间的差异性,从而优化模型参数,使得模型的预测输出与实际输出更加接近。
优化器使用Adam,为了快速进行训练,本文是训练最后的全连接层。
学习率调度器这里使用CyclicLR,它是一个周期性的类似cos函数的曲线。
def model_train(model,optimizer,scheduler,train_dl,epoch,Epoch):
total_loss = 0
total_accuracy = 0
print('Start Train')
pbar = tqdm(train_dl)
model.train()
index = 0
for batch in pbar:
images, labels = batch
images = images.float().cuda()
labels = labels.cuda()
#----------------------#
# 清零梯度
#----------------------#
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if index%10==9:
scheduler.step()
with torch.no_grad():
accuracy = torch.mean((torch.argmax(F.softmax(outputs, dim=-1), dim=-1) == labels).type(torch.FloatTensor))
total_loss += loss.item()
total_accuracy += accuracy.item()
pbar.set_description(f'Epoch [{epoch+1}/{Epoch}]')
pbar.set_postfix(**{'total_loss': total_loss / (index + 1),
'accuracy' : total_accuracy / (index + 1),
'lr' : optimizer.param_groups[0]['lr']})
index += 1
print('Finish Train')
这是模型的训练
def model_test(model,test_dl,epoch,Epoch):
val_total_loss = 0
val_total_accuracy = 0
print('Start Validation')
pbar_val = tqdm(enumerate(test_dl))
model.eval()
for iteration, batch in pbar_val:
images, labels = batch
with torch.no_grad():
images = images.float().cuda()
labels = labels.cuda()
optimizer.zero_grad()
outputs = model(images)
loss = nn.NLLLoss()(F.log_softmax(outputs, -1), labels)
accuracy = torch.mean((torch.argmax(F.softmax(outputs, dim=-1), dim=-1) == labels).type(torch.FloatTensor))
val_total_loss += loss.item()
val_total_accuracy += accuracy.item()
pbar_val.set_description(f'Epoch [{epoch+1}/{Epoch}]')
pbar_val.set_postfix(**{'total_loss': val_total_loss / (iteration + 1),
'accuracy' : val_total_accuracy / (iteration + 1),
'lr' : optimizer.param_groups[0]['lr']})
这是模型的测试
结果可视化
import random
tmp = random.randint(0,len(test_ds))
img,label = test_ds.__getitem__(tmp)
print(img.shape,label)
img = img.float().unsqueeze(0).cuda()
print(img.shape)
model.eval()
pre = model(img)
print(pre.shape)
pre = torch.argmax(F.softmax(pre, dim=-1), dim=-1)
print(pre.item())
plt.figure(figsize=(12,8))
img = img.detach().cpu().squeeze().numpy().transpose(1,2,0)
pre = pre.detach().cpu().squeeze().numpy()
plt.imshow(img)
title = 'label is {},predict is {}'.format(label,pre)
plt.title(title)
plt.show()
随机在测试图像中选取一幅图像,利用模型进行预测,将预测结果进行可视化。
总结
全部内容的话,可看https://www.kaggle.com/code/yipengzhou3/cifar100-pytorch/notebook
有用的话,可以三连鼓励以下!