目录
任务介绍
任务一:基于多模态眼底影像的青光眼分级
在本任务中,我们的目的是分析2D眼底图像和3D OCT扫描体数据两种模态的临床数据,根据视觉特征将样本分级为无青光眼、早期青光眼、中或晚期青光眼三个类别。
官方baseline地址
基于paddle实现的baseline
网络结构
网络的backbone由两个ResNet组成,一个负责提取2D模态图像特征,一个负责提取3D模态图像特征,经过卷积层后,将提取到的多维特征压成一维,通过concat合并为一个一维数组,最后输出分类结果。
网络输入
数据格式[batch, channel, height, width],3D图像使用opencv读入是3通道的,需要转换成单通道灰度图。
Code
这里只解释一些非常规的代码,常规代码看paddle官方文档即可
import
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
# 评估函数
from sklearn.metrics import cohen_kappa_score
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.vision.models import resnet34
# 这个transforms是一个自行实现的文件,不是第三方库
import transforms as trans
import warnings
DataLoader
dataset类
class GAMMA_sub1_dataset(paddle.io.Dataset):
"""
getitem() output:
fundus_img: RGB uint8 image with shape (3, image_size, image_size)
oct_img: Uint8 image with shape (256, oct_img_size[0], oct_img_size[1])
"""
def __init__(self,
img_transforms,
oct_transforms,
dataset_root,
label_file='',
filelists=None,
num_classes=3,
mode='train'):
self.dataset_root = dataset_root
self.img_transforms = img_transforms
self.oct_transforms = oct_transforms
self.mode = mode.lower()
self.num_classes = num_classes
if self.mode == 'train':
label = {row['data']: row[1:].values
for _, row in pd.read_excel(label_file).iterrows()}
self.file_list = [[f, label[int(f)]] for f in os.listdir(dataset_root)]
elif self.mode == "test":
self.file_list = [[f, None] for f in os.listdir(dataset_root)]
if filelists is not None:
self.file_list = [item for item in self.file_list if item[0] in filelists]
def __getitem__(self, idx):
real_index, label = self.file_list[idx]
fundus_img_path = os.path.join(self.dataset_root, real_index, real_index + ".jpg")
# 这里有个问题就是图片读入是乱序的不是按照序号0到255读入
oct_series_list = sorted(os.listdir(os.path.join(self.dataset_root, real_index, real_index)),
key=lambda x: int(x.strip("_")[0]))
fundus_img = cv2.imread(fundus_img_path)[:, :, ::-1] # BGR -> RGB
oct_series_0 = cv2.imread(os.path.join(self.dataset_root, real_index, real_index, oct_series_list[0]),
cv2.IMREAD_GRAYSCALE)
oct_img = np.zeros((len(oct_series_list), oct_series_0.shape[0], oct_series_0.shape[1], 1), dtype="uint8")
for k, p in enumerate(oct_series_list):
oct_img[k] = cv2.imread(
os.path.join(self.dataset_root, real_index, real_index, p), cv2.IMREAD_GRAYSCALE)[..., np.newaxis]
# 如果对应图像数据transforms不为空,则对数据进行transforms
if self.img_transforms is not None:
fundus_img = self.img_transforms(fundus_img)
if self.oct_transforms is not None:
oct_img = self.oct_transforms(oct_img)
# normlize on GPU to save CPU Memory and IO consuming.
# fundus_img = (fundus_img / 255.).astype("float32")
# oct_img = (oct_img / 255.).astype("float32")
# 眼底图片改变通道顺序
fundus_img = fundus_img.transpose(2, 0, 1) # H, W, C -> C, H, W
# oct图像去掉最后一维
oct_img = oct_img.squeeze(-1) # D, H, W, 1 -> D, H, W
if self.mode == 'test':
return fundus_img, oct_img, real_index
if self.mode == "train":
label = label.argmax()
return fundus_img, oct_img, label
def __len__(self):
return len(self.file_list)
进行的transforms
# 训练集眼底图片进行的transforms
img_train_transforms = trans.Compose([
# 随机resize裁剪
trans.RandomResizedCrop(
image_size, scale=(0.90, 1.1), ratio=(0.90, 1.1)),
# 随机水平翻转
trans.RandomHorizontalFlip(),
# 随机垂直翻转
trans.RandomVerticalFlip(),
# 随机角度翻转0~30度
trans.RandomRotation(30)
])
# 训练集oct图片进行的transforms
oct_train_transforms = trans.Compose([
# 对图片进行中央剪裁
trans.CenterCrop([256] + oct_img_size),
trans.RandomHorizontalFlip(),
trans.RandomVerticalFlip()
])
# 验证集眼底图片进行的transforms
img_val_transforms = trans.Compose([
# 裁剪中央正方形
trans.CropCenterSquare(),
trans.Resize((image_size, image_size))
])
# 验证集oct图片进行的transforms
oct_val_transforms = trans.Compose([
trans.CenterCrop([256] + oct_img_size)
])
NetWork
网络模型从paddle.vision.models import了resnet34,只要init的时候num_classes设置为0,网络结构就不含有末端全连接层
class Model(nn.Layer):
"""
simply create a 2-branch network, and concat global pooled feature vector.
each branch = single resnet34
"""
def __init__(self):
super(Model, self).__init__()
# 带pretrained代表使用paddle的预训练模型,做Transfer Learning
self.fundus_branch = resnet34(pretrained=True, num_classes=0) # remove final fc
self.oct_branch = resnet34(pretrained=True, num_classes=0) # remove final fc
self.decision_branch = nn.Linear(512 * 1 * 2, 3) # ResNet34 use basic block, expansion = 1
# replace first conv layer in oct_branch
# 对oct提取特征的resnet34的第一层卷积层进行修改,修改通道跟oct图像的输入通道一致,都是256
self.oct_branch.conv1 = nn.Conv2D(256, 64,
kernel_size=7,
stride=2,
padding=3,
bias_attr=False)
# 网络组网
def forward(self, fundus_img, oct_img):
b1 = self.fundus_branch(fundus_img)
b2 = self.oct_branch(oct_img)
# 将图像压成一维
b1 = paddle.flatten(b1, 1)
b2 = paddle.flatten(b2, 1)
# 将两个一维tensor concat在一起,再通过一个全连接层,最后做softmax处理获得分类结果
logit = self.decision_branch(paddle.concat([b1, b2], 1))
return logit
评估指标
以下为该任务的评估指标,评估函数使用sklearn库的cohen_kappa_score函数计算
Train & Val
def train(model, iters, train_dataloader, val_dataloader, optimizer, criterion, log_interval, eval_interval):
iter = 0
model.train()
# 储存平均loss和平均kappa的list
avg_loss_list = []
avg_kappa_list = []
best_kappa = 0.
while iter < iters:
for data in train_dataloader:
iter += 1
if iter > iters:
break
# 输入图片int32转float32
fundus_imgs = (data[0] / 255.).astype("float32")
oct_imgs = (data[1] / 255.).astype("float32")
labels = data[2].astype('int64')
# 模型输入眼底和oct图像
logits = model(fundus_imgs, oct_imgs)
# 计算loss
loss = criterion(logits, labels)
# acc = paddle.metric.accuracy(input=logits, label=labels.reshape((-1, 1)), k=1)
for p, l in zip(logits.numpy().argmax(1), labels.numpy()):
avg_kappa_list.append([p, l])
loss.backward()
optimizer.step()
model.clear_gradients()
avg_loss_list.append(loss.numpy()[0])
if iter % log_interval == 0:
# 计算平均loss, list转ndarray再求均值
avg_loss = np.array(avg_loss_list).mean()
# list转ndarray
avg_kappa_list = np.array(avg_kappa_list)
# 计算平均kappa
avg_kappa = cohen_kappa_score(avg_kappa_list[:, 0], avg_kappa_list[:, 1], weights='quadratic')
# 对两个list进行清空
avg_loss_list = []
avg_kappa_list = []
print("[TRAIN] iter={}/{} avg_loss={:.4f} avg_kappa={:.4f}".format(iter, iters, avg_loss, avg_kappa))
if iter % eval_interval == 0:
# 进行验证操作, 获得验证集的avg_loss和avg_kappa
avg_loss, avg_kappa = val(model, val_dataloader, criterion)
print("[EVAL] iter={}/{} avg_loss={:.4f} kappa={:.4f}".format(iter, iters, avg_loss, avg_kappa))
# 储存指标最优模型
if avg_kappa >= best_kappa:
best_kappa = avg_kappa
paddle.save(model.state_dict(),
os.path.join("best_model_{:.4f}".format(best_kappa), 'model.pdparams'))
model.train()
def val(model, val_dataloader, criterion):
model.eval()
avg_loss_list = []
cache = []
with paddle.no_grad():
for data in val_dataloader:
fundus_imgs = (data[0] / 255.).astype("float32")
oct_imgs = (data[1] / 255.).astype("float32")
labels = data[2].astype('int64')
logits = model(fundus_imgs, oct_imgs)
for p, l in zip(logits.numpy().argmax(1), labels.numpy()):
cache.append([p, l])
loss = criterion(logits, labels)
# acc = paddle.metric.accuracy(input=logits, label=labels.reshape((-1, 1)), k=1)
avg_loss_list.append(loss.numpy()[0])
cache = np.array(cache)
kappa = cohen_kappa_score(cache[:, 0], cache[:, 1], weights='quadratic')
avg_loss = np.array(avg_loss_list).mean()
return avg_loss, kappa
训练模型超参数
学习率在baseline是个固定值,batch_size和iteration根据自己实际情况而定,在这里就不写了
Key | Value |
---|---|
优化器 | Adam |
loss函数 | CrossEntropyLoss |
其他的一些说明
测试集和验证集的划分
使用的是sklearn的train_test_split函数,进行测试集合验证集的划分
from sklearn.model_selection import train_test_split
val_ratio = 0.2 # 80 / 20
# 省略filelists生成
train_filelists, val_filelists = train_test_split(filelists, test_size=val_ratio, random_state=42)
结果文件生成
# cache为val操作生成的变量
submission_result = pd.DataFrame(cache, columns=['data', 'dense_pred'])
submission_result['non'] = submission_result['dense_pred'].apply(lambda x: int(x[0] == 0))
submission_result['early'] = submission_result['dense_pred'].apply(lambda x: int(x[0] == 1))
submission_result['mid_advanced'] = submission_result['dense_pred'].apply(lambda x: int(x[0] == 2))
# 最后生成提交结果文件
submission_result[['data', 'non', 'early', 'mid_advanced']].to_csv("./submission_sub1.csv", index=False)
优化方向
- 使用更优的预训练backbone替代resnet34
- 两种模态数据直接使用concat合并太直接,设计一个self-attention模块,让网络自行学习两者的比重
- 对oct图像进行降噪等处理
- 先把眼底图片的视盘分割出来再进行分类
- 官方提供的训练集为小样本,训练一个GAN去生成更多的样本