由于ResNet等以平均池化和一个全连接层结尾的cnn并不需要修改网络结构就可以使用CAM,所以本文基于AlexNet进行搭建。使用Kaggle的猫狗数据集,代码如下:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from PIL import Image
import glob
from torch.utils.data import DataLoader
import numpy as np
from torchvision.models import alexnet
from datetime import datetime
import time
import os
class train_ImageDataset(Dataset):
def __init__(self, root):
# Transforms for low resolution images and high resolution images
self.transform = transforms.Compose(
[
transforms.RandomHorizontalFlip(p=0.3),
transforms.RandomVerticalFlip(p=0.3),
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])# 归一化
self.files = sorted(glob.glob(root + "/*.*"))
def __getitem__(self, index):
path = self.files[index % len(self.files)]
label = path[path.index(".")-3:path.index(".")]
img = Image.open(self.files[index % len(self.files)])
img = self.transform(img)
#print(img.shape)
#print(label)
if label == "cat":
label = [0, 1]
if label == "dog":
label = [1, 0]
label = torch.Tensor(label)
#print(label)
return img, label
def __len__(self):
return len(self.files)
class test_ImageDataset(Dataset):
def __init__(self, root):
# Transforms for low resolution images and high resolution images
self.transform = transforms.Compose(
[
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])# 归一化
self.files = sorted(glob.glob(root + "/*.*"))
def __getitem__(self, index):
path = self.files[index % len(self.files)]
label = path[path.index(".")-3:path.index(".")]
img = Image.open(self.files[index % len(self.files)])
img = self.transform(img)
#print(img.shape)
#print(label)
if label == "cat":
label = [0, 1]
if label == "dog":
label = [1, 0]
label = torch.Tensor(label)
#print(label)
return img, label
def __len__(self):
return len(self.files)
def train(model, device, train_loader, optimizer, epoch, loss):
start_epoch = time.time()
model.train()
error_num = 0
sum_num = 0
sum_loss = 0
for i, (input, label) in enumerate(train_loader):
input = input.to(device)
target = label.to(device)
optimizer.zero_grad()
output = model(input)
Loss = loss(output, target)
Loss.backward()
optimizer.step()
pred = output.max(1, keepdim=True)[1].squeeze(1).cpu().numpy() # 找到概率最大的下标
target = target.max(1, keepdim=True)[1].detach().squeeze(1).cpu().numpy()
error_num += np.sum(np.abs(pred - target))
sum_num += pred.shape[0]
sum_loss += Loss.item()
acc = 1.0 - error_num / sum_num
loss_avg = sum_loss / len(train_loader)
print("Train:[Epoch %d] [Loss: %f] [Acc: %f]" % (epoch, loss_avg, acc))
end_time = time.time()
times = end_time - start_epoch
print(times)
if epoch % 5 == 0:
with open(Log_txt, "a") as f:
f.write("Train:[Iterations %d] [Loss: %f] [Acc: %f] [Epoch Time: %f]\n" % (epoch, loss_avg, acc, times))
# 测试
def test(model, device, test_loader, epoch, loss):
model.eval()
with torch.no_grad():
error_num = 0
sum_num = 0
sum_loss = 0
for input, label in test_loader:
input = input.to(device)
target = label.to(device)
optimizer.zero_grad()
output = model(input)
Loss = loss(output, target)
pred = output.max(1, keepdim=True)[1].squeeze(1).cpu().numpy() # 找到概率最大的下标
target = target.max(1, keepdim=True)[1].detach().squeeze(1).cpu().numpy()
error_num += np.sum(np.abs(pred-target))
sum_num += pred.shape[0]
sum_loss += Loss.item()
acc = 1.0 - error_num / sum_num
loss_avg = sum_loss / len(test_loader)
print("Test:[Epoch %d] [Loss: %f] [Acc: %f]"% (epoch, loss_avg, acc))
if epoch % 5 == 0:
with open(Log_txt, "a") as f:
f.write("Test:[Iterations %d] [Loss: %f] [Acc: %f]\n"% (epoch, loss_avg, acc))
class cnn(nn.Module):
def __init__(self):
super(cnn,self).__init__()
self.model = alexnet(pretrained=True)
self.encoder = nn.Sequential(*list(self.model.children())[0]) #只取卷积层,如果使用vgg16,只需要把alexnet改成vgg16即可
self.avg = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(256,2)
def forward(self, x):
x = self.encoder(x) #batchsize*256*7*7
x = self.avg(x)
x = x.view(x.shape[0], -1)
x = self.fc(x)
return x
if __name__ == '__main__':
os.makedirs("log",exist_ok=True)
os.makedirs("saved_model", exist_ok=True)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Log_txt = "./log/" + str(datetime.now()).replace(" ","_").replace(":","_") + "_result.txt"
start_all_time = time.time()
train_loader = DataLoader(
train_ImageDataset("./train"),
batch_size=256,
shuffle=True,
num_workers=0,
)
test_loader = DataLoader(
test_ImageDataset("./test"),
batch_size=256,
shuffle=True,
num_workers=0,
)
model = cnn().to(DEVICE)
print(model)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
loss = torch.nn.CrossEntropyLoss().to(DEVICE)
EPOCH = 50
for epoch in range(1, EPOCH + 1):
train(model, DEVICE, train_loader, optimizer, epoch, loss)
test(model, DEVICE, test_loader, epoch, loss)
torch.save(model.state_dict(), './saved_model/AlexNet_oriCAM_CAT&DOG_iteration_' + str(epoch) + '.pth') # 保存训练完成后的模型
end_all_time = time.time()
time_all = end_all_time - start_all_time
with open(Log_txt, "a") as f:
f.write("All Time: %f" % (time_all))
如果需要输出类激活热力图,代码如下:
import os
from torch.utils.data import Dataset
import glob
from torch.utils.data import DataLoader
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torchvision.models import alexnet
from torchvision import datasets, transforms
import numpy as np
from PIL import Image
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
class test_ImageDataset(Dataset):
def __init__(self, root):
# Transforms for low resolution images and high resolution images
self.transform = transforms.Compose(
[
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])# 归一化
self.files = sorted(glob.glob(root + "/*.*"))
def __getitem__(self, index):
path = self.files[index % len(self.files)]
label = path[path.index(".")-3:path.index(".")]
img = Image.open(self.files[index % len(self.files)])
img = self.transform(img)
#print(img.shape)
#print(label)
if label == "cat":
label = [0, 1]
if label == "dog":
label = [1, 0]
label = torch.Tensor(label)
#print(label)
return img, label
def __len__(self):
return len(self.files)
def denormalize(tensors):
""" Denormalizes image tensors using mean and std """
for c in range(3):
tensors[c,:,:].mul_(std[c]).add_(mean[c])
return torch.clamp(tensors, 0, 255)
class cnn(nn.Module):
def __init__(self):
super(cnn,self).__init__()
self.model = alexnet(pretrained=True)
self.encoder = nn.Sequential(*list(self.model.children())[0])
self.avg = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(256,2)
def forward(self, x):
xx = self.encoder(x) #batchsize*256*7*7
x = self.avg(xx)
x = x.view(x.shape[0], -1)
x = self.fc(x)
return x, xx
# 测试
def test(model, device, test_loader):
model.eval()
for name, param in model.named_parameters():
if name=="fc.weight":
w = param.detach().cpu().numpy()
break
with torch.no_grad():
index = 1
for data, target in test_loader:
data_de = denormalize(data.squeeze()) #防止后面data变成cuda
data, target = data.to(device), target.to(device)
true_output, featuremap = model(data)
featuremap = featuremap.detach().squeeze().cpu().numpy()
pred = true_output.max(1, keepdim=True)[1].squeeze(1).cpu().numpy() # 找到概率最大的下标
#print(target)
target = target.max(1, keepdim=True)[1].detach().squeeze(1).cpu().numpy()
bo = (pred == target)
dir = "./oriCAM_output/test/%d_attentionMap_%d_%s" % (index, int(pred[0]), str(bo))
os.makedirs(dir, exist_ok=True)
ori__img = np.array(data_de)[::-1, :, :].transpose(1, 2, 0) * 255.0
ori_img = cv2.resize(ori__img, (500, 500))
original_img = ori_img.astype(np.uint8)
cv2.imwrite("%s/%d_ORIImg.jpg" % (dir, index), original_img)
prediction = int(pred[0])
weight = w[prediction,:]
activation_img = np.zeros((featuremap.shape[1],featuremap.shape[2]))
for num2 in range(featuremap.shape[0]):
feature_img = featuremap[num2, :, :] * weight[num2]
activation_img += feature_img
activation_img = (activation_img - np.min(activation_img)) / (
np.max(activation_img) - np.min(activation_img)) * 255.0
activation_img = cv2.resize(activation_img, (500, 500))
activation_img = activation_img.astype(np.uint8)
image = cv2.applyColorMap(activation_img, cv2.COLORMAP_JET)
add_img = cv2.addWeighted(image, 0.7, original_img, 0.3, 0)
cv2.imwrite("%s/%d_AttImg.jpg" % (dir, index), image)
cv2.imwrite("%s/%d_AddImg.jpg" % (dir, index), add_img)
index += 1
if __name__ == '__main__':
BATCH_SIZE = 1 # 大概需要2G的显存
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 让torch判断是否使用GPU,建议使用GPU环境,因为会快很多
# 下载测试集
test_loader = DataLoader(
test_ImageDataset("./test"),
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=0,
)
model = cnn().to(DEVICE)
model.load_state_dict(torch.load("./saved_model/AlexNet_oriCAM_CAT&DOG_iteration_50.pth"))
test(model,DEVICE,test_loader)
由上图可见效果还是不错滴~