Hung-Yi Lee homework[5]:Explaninable ML


(一) 作业描述

  本次作业分为3个任务:
在这里插入图片描述

(二) 实现过程

  代码组成结构如下:
在这里插入图片描述
  其中,用到了

  1. food-11文件夹,里面是homework-3的数据集,本次实验中用到其中的/training文件夹;
  2. define_calss.py,定义了网络结构,图片预处理过程,数据集组成方式等【从homework-3中直接拿来进行了些许修改即可】;
  3. saliecncy_map.py,实现第一个task的代码;
  4. filter_explanation.py,实现第二个task的代码;
  5. lime_map.py,实现第三个task的代码。

1. define

import os
import numpy as np
import cv2
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

# 数据集中图片命名格式:[类别]_[编号].jpg
# 利用opencv读取照片并存放在numpy array中
def readfile(path, label):
    # label是一个Boolean variable,代表需不需要回传y值
    print('list_dir:', os.listdir(path))
    image_dir = sorted(os.listdir(path))
    print('image_dir:', image_dir)

    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img, (128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x, y
    else:
        return x
# 定义模型
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # input维度[3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1),  # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1),  # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 8, 8]

            nn.Conv2d(512, 512, 3, 1, 1),  # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 4, 4]
        )

        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# 定义数据集
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X
    def getbatch(self, indices):
        images = []
        labels = []
        for index in indices:
            image, label = self.__getitem__(index)
            images.append(image)
            labels.append(label)
        return torch.stack(images), torch.tensor(labels)

def get_paths_labels(path):
    imgnames = os.listdir(path)
    imgnames.sort()
    imgpaths = []
    labels = []
    for name in imgnames:
        imgpaths.append(os.path.join(path, name))
        labels.append(int(name.split('_')[0]))
    return imgpaths, labels

2. Sailency Map

import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *


def normalize(image):
  return (image - image.min()) / (image.max() - image.min())


def compute_saliency_maps(x, y, model):
  model.eval()
  x = x.cuda()

  x.requires_grad_()

  y_pred = model(x)
  loss_func = torch.nn.CrossEntropyLoss()
  loss = loss_func(y_pred, y.cuda())
  loss.backward()

  saliencies = x.grad.abs().detach().cpu()
  # saliencies: (batches, channels, height, weight)

  saliencies = torch.stack([normalize(item) for item in saliencies])
  return saliencies

if __name__ == '__main__':
  args = {
    'dataset_dir': './food-11/'
  }
  args = argparse.Namespace(**args)

  # 导入模型
  # model = Classifier().cuda()
  model = torch.load('model.pt')
  # checkpoint = torch.load(args.ckptpath)
  # model.load_state_dict(checkpoint['model_state_dict'])

  train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
  # print(train_x)
  train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),  # 随机将图片进行水平翻转
    transforms.RandomRotation(15),  # 随机旋转图片
    transforms.ToTensor(),  # 将图片转成Tensor,并将数值normalize到[0,1]
  ])
  train_set = ImgDataset(train_x, train_y, train_transform)

  img_indices = [83, 4218, 4707, 8598]
  images, labels = train_set.getbatch(img_indices)
  saliencies = compute_saliency_maps(images, labels, model)

  # 使用 matplotlib 画出來
  fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
  for row, target in enumerate([images, saliencies]):
    for column, img in enumerate(target):
      axs[row][column].imshow(img.permute(1, 2, 0).numpy())
  plt.show()
  plt.close()import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *


def normalize(image):
  return (image - image.min()) / (image.max() - image.min())


def compute_saliency_maps(x, y, model):
  model.eval()
  x = x.cuda()

  # 因为要计算loss对input image的微分,所以需要告诉pytorch这个输入需要gradient
  x.requires_grad_()

  y_pred = model(x)
  loss_func = torch.nn.CrossEntropyLoss()
  loss = loss_func(y_pred, y.cuda())
  loss.backward()

  # saliencies: (batches, channels, height, weight)
  saliencies = x.grad.abs().detach().cpu()

  # 每张图片的gradient scale可能会大不相同,如果用同样的色阶去画每一张saliency的话,很容易出现对比度不高的图片
  saliencies = torch.stack([normalize(item) for item in saliencies])
  return saliencies

if __name__ == '__main__':
  args = {
    'dataset_dir': './food-11/'
  }
  args = argparse.Namespace(**args)

  # 导入模型
  model = torch.load('model.pt')

  train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)

  train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),  # 随机将图片进行水平翻转
    transforms.RandomRotation(15),  # 随机旋转图片
    transforms.ToTensor(),  # 将图片转成Tensor,并将数值normalize到[0,1]
  ])
  train_set = ImgDataset(train_x, train_y, train_transform)

  # 指定需要进行可视化的图片的indices
  img_indices = [83, 4218, 4707, 8598]
  images, labels = train_set.getbatch(img_indices)
  saliencies = compute_saliency_maps(images, labels, model)

  # 使用 matplotlib 画出來
  fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
  for row, target in enumerate([images, saliencies]):
    for column, img in enumerate(target):
      axs[row][column].imshow(img.permute(1, 2, 0).numpy())
  plt.show()
  plt.close()

  结果如下图所示,可以发现模型确实时找出了食物的大致轮廓,也就是说模型确实是从食物本身进行辨认的:
在这里插入图片描述

3. filter explanation

import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *

def normalize(image):
  return (image - image.min()) / (image.max() - image.min())

def filter_explaination(x, model, cnnid, filterid, iteration=100, lr=1):

  # x用来观察哪些位置可以激活指定filter的图片
  # cnnid,filterid:想要指定第几个cnn中的第几个filter
  model.eval()

  def hook(model, input, output):
    global layer_activations
    layer_activations = output

  hook_handle = model.cnn[cnnid].register_forward_hook(hook)
  model(x.cuda())

  # 执行forward
  filter_activations = layer_activations[:, filterid, :, :].detach().cpu()

  # 从一张随机噪声的图片开始找
  x = x.cuda()

  # 计算偏微分,并利用偏微分和optimizer逐步修改输入图片使得filter activation越来越大
  x.requires_grad_()
  optimizer = Adam([x], lr=lr)
  for iter in range(iteration):
      optimizer.zero_grad()
      model(x)

      objective = -layer_activations[:, filterid, :, :].sum()
      objective.backward()
      optimizer.step()
  filter_visualization = x.detach().cpu().squeeze()[0]

  hook_handle.remove()

  return filter_activations, filter_visualization
if __name__ == '__main__':
  args = {
    'dataset_dir': './food-11/'
  }
  args = argparse.Namespace(**args)

  # 导入模型
  # model = Classifier().cuda()
  model = torch.load('model.pt')
  # checkpoint = torch.load(args.ckptpath)
  # model.load_state_dict(checkpoint['model_state_dict'])

  train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
  # print(train_x)
  train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),  # 随机将图片进行水平翻转
    transforms.RandomRotation(15),  # 随机旋转图片
    transforms.ToTensor(),  # 将图片转成Tensor,并将数值normalize到[0,1]
  ])
  train_set = ImgDataset(train_x, train_y, train_transform)

  layer_activations = None
  img_indices = [83, 4218, 4707, 8598]
  images, labels = train_set.getbatch(img_indices)
  filter_activations, filter_visualization = filter_explaination(images, model, cnnid=14, filterid=0, iteration=100,
                                                                 lr=0.1)
  plt.imshow(normalize(filter_visualization.permute(1, 2, 0)))
  plt.show()
  plt.close()
  fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
  for i, img in enumerate(images):
      axs[0][i].imshow(img.permute(1, 2, 0))
  for i, img in enumerate(filter_activations):
      axs[1][i].imshow(normalize(img))
  plt.show()
  plt.close()

  结果如下图所示,看起来检测的是边缘【cnn第14层的第1个filter】:

filter visualizationfilter activations
在这里插入图片描述在这里插入图片描述

4. Lime

import os
import sys
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from skimage.segmentation import slic
from lime import lime_image
from define_class import *


def predict(input):
    # input: numpy array, (batches, height, width, channels)

    model.eval()
    input = torch.FloatTensor(input).permute(0, 3, 1, 2)

    output = model(input.cuda())
    return output.detach().cpu().numpy()


def segmentation(input):
    return slic(input, n_segments=100, compactness=1, sigma=1)

if __name__ == '__main__':
    args = {
    'dataset_dir': './food-11/'
    }
    args = argparse.Namespace(**args)

    # 导入模型
    # model = Classifier().cuda()
    model = torch.load('model.pt')
    # checkpoint = torch.load(args.ckptpath)
    # model.load_state_dict(checkpoint['model_state_dict'])

    train_x, train_y = readfile(os.path.join(args.dataset_dir, 'training'), True)
    # print(train_x)
    train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),  # 随机将图片进行水平翻转
    transforms.RandomRotation(15),  # 随机旋转图片
    transforms.ToTensor(),  # 将图片转成Tensor,并将数值normalize到[0,1]
    ])
    train_set = ImgDataset(train_x, train_y, train_transform)
    img_indices = [83, 4218, 4707, 8598]
    images, labels = train_set.getbatch(img_indices)
    fig, axs = plt.subplots(1, 4, figsize=(15, 8))
    np.random.seed(16)

    for idx, (image, label) in enumerate(zip(images.permute(0, 2, 3, 1).numpy(), labels)):
        x = image.astype(np.double)

        explainer = lime_image.LimeImageExplainer()
        explaination = explainer.explain_instance(image=x, classifier_fn=predict, segmentation_fn=segmentation)

        lime_img, mask = explaination.get_image_and_mask(
            label=label.item(),
            positive_only=False,
            hide_rest=False,
            num_features=11,
            min_weight=0.05
        )
        axs[idx].imshow(lime_img)

    plt.show()
    plt.close()

  结果如下图所示:
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值