Hung-Yi Lee homework[5]:Explaninable ML
(一) 作业描述
本次作业分为3个任务:
(二) 实现过程
代码组成结构如下:
其中,用到了
food-11
文件夹,里面是homework-3的数据集,本次实验中用到其中的/training
文件夹;define_calss.py
,定义了网络结构,图片预处理过程,数据集组成方式等【从homework-3中直接拿来进行了些许修改即可】;saliecncy_map.py
,实现第一个task的代码;filter_explanation.py
,实现第二个task的代码;lime_map.py
,实现第三个task的代码。
1. define
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
# 数据集中图片命名格式:[类别]_[编号].jpg
# 利用opencv读取照片并存放在numpy array中
def readfile(path, label):
# label是一个Boolean variable,代表需不需要回传y值
print('list_dir:', os.listdir(path))
image_dir = sorted(os.listdir(path))
print('image_dir:', image_dir)
x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
y = np.zeros((len(image_dir)), dtype=np.uint8)
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
x[i, :, :] = cv2.resize(img, (128, 128))
if label:
y[i] = int(file.split("_")[0])
if label:
return x, y
else:
return x
# 定义模型
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# input维度[3, 128, 128]
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]
nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]
nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]
nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]
nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
self.fc = nn.Sequential(
nn.Linear(512*4*4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)
# 定义数据集
class ImgDataset(Dataset):
def __init__(self, x, y=None, transform=None):
self.x = x
# label is required to be a LongTensor
self.y = y
if y is not None:
self.y = torch.LongTensor(y)
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X
def getbatch(self, indices):
images = []
labels = []
for index in indices:
image, label = self.__getitem__(index)
images.append(image)
labels.append(label)
return torch.stack(images), torch.tensor(labels)
def get_paths_labels(path):
imgnames = os.listdir(path)
imgnames.sort()
imgpaths = []
labels = []
for name in imgnames:
imgpaths.append(os.path.join(path, name))
labels.append(int(name.split('_')[0]))
return imgpaths, labels
2. Sailency Map
import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
def compute_saliency_maps(x, y, model):
model.eval()
x = x.cuda()
x.requires_grad_()
y_pred = model(x)
loss_func = torch.nn.CrossEntropyLoss()
loss = loss_func(y_pred, y.cuda())
loss.backward()
saliencies = x.grad.abs().detach().cpu()
# saliencies: (batches, channels, height, weight)
saliencies = torch.stack([normalize(item) for item in saliencies])
return saliencies
if __name__ == '__main__':
args = {
'dataset_dir': './food-11/'
}
args = argparse.Namespace(**args)
# 导入模型
# model = Classifier().cuda()
model = torch.load('model.pt')
# checkpoint = torch.load(args.ckptpath)
# model.load_state_dict(checkpoint['model_state_dict'])
train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
# print(train_x)
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), # 随机将图片进行水平翻转
transforms.RandomRotation(15), # 随机旋转图片
transforms.ToTensor(), # 将图片转成Tensor,并将数值normalize到[0,1]
])
train_set = ImgDataset(train_x, train_y, train_transform)
img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
saliencies = compute_saliency_maps(images, labels, model)
# 使用 matplotlib 画出來
fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
for row, target in enumerate([images, saliencies]):
for column, img in enumerate(target):
axs[row][column].imshow(img.permute(1, 2, 0).numpy())
plt.show()
plt.close()import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
def compute_saliency_maps(x, y, model):
model.eval()
x = x.cuda()
# 因为要计算loss对input image的微分,所以需要告诉pytorch这个输入需要gradient
x.requires_grad_()
y_pred = model(x)
loss_func = torch.nn.CrossEntropyLoss()
loss = loss_func(y_pred, y.cuda())
loss.backward()
# saliencies: (batches, channels, height, weight)
saliencies = x.grad.abs().detach().cpu()
# 每张图片的gradient scale可能会大不相同,如果用同样的色阶去画每一张saliency的话,很容易出现对比度不高的图片
saliencies = torch.stack([normalize(item) for item in saliencies])
return saliencies
if __name__ == '__main__':
args = {
'dataset_dir': './food-11/'
}
args = argparse.Namespace(**args)
# 导入模型
model = torch.load('model.pt')
train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), # 随机将图片进行水平翻转
transforms.RandomRotation(15), # 随机旋转图片
transforms.ToTensor(), # 将图片转成Tensor,并将数值normalize到[0,1]
])
train_set = ImgDataset(train_x, train_y, train_transform)
# 指定需要进行可视化的图片的indices
img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
saliencies = compute_saliency_maps(images, labels, model)
# 使用 matplotlib 画出來
fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
for row, target in enumerate([images, saliencies]):
for column, img in enumerate(target):
axs[row][column].imshow(img.permute(1, 2, 0).numpy())
plt.show()
plt.close()
结果如下图所示,可以发现模型确实时找出了食物的大致轮廓,也就是说模型确实是从食物本身进行辨认的:
3. filter explanation
import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *
def normalize(image):
return (image - image.min()) / (image.max() - image.min())
def filter_explaination(x, model, cnnid, filterid, iteration=100, lr=1):
# x用来观察哪些位置可以激活指定filter的图片
# cnnid,filterid:想要指定第几个cnn中的第几个filter
model.eval()
def hook(model, input, output):
global layer_activations
layer_activations = output
hook_handle = model.cnn[cnnid].register_forward_hook(hook)
model(x.cuda())
# 执行forward
filter_activations = layer_activations[:, filterid, :, :].detach().cpu()
# 从一张随机噪声的图片开始找
x = x.cuda()
# 计算偏微分,并利用偏微分和optimizer逐步修改输入图片使得filter activation越来越大
x.requires_grad_()
optimizer = Adam([x], lr=lr)
for iter in range(iteration):
optimizer.zero_grad()
model(x)
objective = -layer_activations[:, filterid, :, :].sum()
objective.backward()
optimizer.step()
filter_visualization = x.detach().cpu().squeeze()[0]
hook_handle.remove()
return filter_activations, filter_visualization
if __name__ == '__main__':
args = {
'dataset_dir': './food-11/'
}
args = argparse.Namespace(**args)
# 导入模型
# model = Classifier().cuda()
model = torch.load('model.pt')
# checkpoint = torch.load(args.ckptpath)
# model.load_state_dict(checkpoint['model_state_dict'])
train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
# print(train_x)
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), # 随机将图片进行水平翻转
transforms.RandomRotation(15), # 随机旋转图片
transforms.ToTensor(), # 将图片转成Tensor,并将数值normalize到[0,1]
])
train_set = ImgDataset(train_x, train_y, train_transform)
layer_activations = None
img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
filter_activations, filter_visualization = filter_explaination(images, model, cnnid=14, filterid=0, iteration=100,
lr=0.1)
plt.imshow(normalize(filter_visualization.permute(1, 2, 0)))
plt.show()
plt.close()
fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
for i, img in enumerate(images):
axs[0][i].imshow(img.permute(1, 2, 0))
for i, img in enumerate(filter_activations):
axs[1][i].imshow(normalize(img))
plt.show()
plt.close()
结果如下图所示,看起来检测的是边缘【cnn第14层的第1个filter】:
filter visualization | filter activations |
---|---|
![]() | ![]() |
4. Lime
import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *
def predict(input):
# input: numpy array, (batches, height, width, channels)
model.eval()
input = torch.FloatTensor(input).permute(0, 3, 1, 2)
output = model(input.cuda())
return output.detach().cpu().numpy()
def segmentation(input):
return slic(input, n_segments=100, compactness=1, sigma=1)
if __name__ == '__main__':
args = {
'dataset_dir': './food-11/'
}
args = argparse.Namespace(**args)
# 导入模型
# model = Classifier().cuda()
model = torch.load('model.pt')
# checkpoint = torch.load(args.ckptpath)
# model.load_state_dict(checkpoint['model_state_dict'])
train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
# print(train_x)
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), # 随机将图片进行水平翻转
transforms.RandomRotation(15), # 随机旋转图片
transforms.ToTensor(), # 将图片转成Tensor,并将数值normalize到[0,1]
])
train_set = ImgDataset(train_x, train_y, train_transform)
img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
fig, axs = plt.subplots(1, 4, figsize=(15, 8))
np.random.seed(16)
for idx, (image, label) in enumerate(zip(images.permute(0, 2, 3, 1).numpy(), labels)):
x = image.astype(np.double)
explainer = lime_image.LimeImageExplainer()
explaination = explainer.explain_instance(image=x, classifier_fn=predict, segmentation_fn=segmentation)
lime_img, mask = explaination.get_image_and_mask(
label=label.item(),
positive_only=False,
hide_rest=False,
num_features=11,
min_weight=0.05
)
axs[idx].imshow(lime_img)
plt.show()
plt.close()
结果如下图所示: