用BCCD数据集学习rcnn家族（一）——介绍BCCD数据集及预处理

最新推荐文章于 2025-04-28 14:09:26 发布

冰西瓜是生活动力

最新推荐文章于 2025-04-28 14:09:26 发布

阅读量5.3k

点赞数 2

分类专栏：基于深度学习的图像识别文章标签：深度学习可视化 python 神经网络

本文链接：https://blog.csdn.net/qq_20491295/article/details/109312771

版权

本文介绍了BCCD血细胞数据集的读取和预处理，包括XML标签数据解析、图片处理、数据增强和可视化，为深度学习模型训练做准备。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

前言

rcnn家族其实已经是一个“非常老”的算法，虽然一开始提出是2014。但是深度学习用于图像识别的开山之作，我打算从rcnn开始学习，网上对于网络和论文的讲解很详细，我这系列就不说理论了，主要是记录我学习过程和写代码跳过的坑。

一、bccd数据介绍

bccd血细胞数据集是一个比较老的数据集，也不大，可以在这下载 https://public.roboflow.com/object-detection/bccd

该数据集共有三类364张图像：（WBC白细胞），RBC（红细胞）和Platelets。3个类别中有4888个标签（有0个空示例）。下图是网站得到的可视化数据（三个类别细胞标注数量计数）。这个网站的教程里还有很多可视化，比如各个细胞分布的热点图等，这里就不全放了。
在这里插入图片描述

二、读取数据

1.读xml标签数据

xml的格式如下图所示，需要重点关注的是框起来的部分，从上往下分别是，图片名称和图片位置、图片size（416，416，3）、真实框的标签、真实框的difficult、真实框的位置。
为了读该xml文件，以下是一个相对框架式的代码：

import os
import numpy as np
import xml.etree.ElementTree as ET
CELL_NAMES = ['RBC', 'WBC', 'Platelets']

# 返回一个字典，将类别转为数字
# {'RBC': 0, 'WBC': 1, 'Platelets': 2}
def get_cell_names():
    cell_category2id = {
   }
    for i, item in enumerate(CELL_NAMES):
        cell_category2id[item] = i
    return cell_category2id

 # 获得数据集列表
def get_annotations(cname2cid, datadir):
    filenames = os.listdir(datadir)
    ct = 0
    records = []
    for fname in filenames:
        fib = fname.split('.')
        if fib[3]=='jpg':
            continue
        elif fib[3]=='xml':
            fpath = os.path.join(datadir, fname)
            img_file = os.path.join(datadir, fib[0]+'.'+fib[1]+'.'+fib[2]+'.jpg')
            tree = ET.parse(fpath)
    
        objs = tree.findall('object')
        im_w = float(tree.find('size').find('width').text)
        im_h = float(tree.find('size').find('height').text)
        gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
        gt_class = np.zeros((len(objs), ), dtype=np.int32)
        is_crowd = np.zeros((len(objs), ), dtype=np.int32)
        difficult = np.zeros((len(objs), ), dtype=np.int32)
        for i, obj in enumerate(objs):
            cname = obj.find('name').text
            gt_class[i] = cname2cid[cname]
            _difficult = int(obj.find('difficult').text)
            x1 = float(obj.find('bndbox').find('xmin').text)
            y1 = float(obj.find('bndbox').find('ymin').text)
            x2 = float(obj.find('bndbox').find('xmax').text)
            y2 = float(obj.find('bndbox').find('ymax').text)
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(im_w - 1, x2)
            y2 = min(im_h - 1, y2)
            # 这里使用xywh格式来表示目标物体真实框
            gt_bbox[i] = [(x1+x2)/2.0 , (y1+y2)/2.0, x2-x1+1., y2-y1+1.]
            is_crowd[i] = 0
            difficult[i] = _difficult

        voc_rec = {
   
            'im_file': img_file,
            'h': im_h,
            'w': im_w,
            'is_crowd': is_crowd,
            'gt_class': gt_class,
            'gt_bbox': gt_bbox,
            'gt_poly': [],
            'difficult': difficult
            }
        if len(objs) != 0:
            records.append(voc_rec)
        ct += 1
    return records
 
train_path = '/content/gdrive/My Drive/bccd/train'
val_path = '/content/gdrive/My Drive/bccd/valid'
test_path = '/content/gdrive/My Drive/bccd/test'
cname2cid = get_cell_names()
records = get_annotations(cname2cid,train_path)

读取后的示例

{‘difficult’: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32),
‘gt_bbox’: array([[188.5, 269.5, 68. , 80. ],
[142.5, 40.5, 68. , 80. ],
[277.5, 135.5, 68. , 96. ],
[364. , 152. , 81. , 107. ],
[164.5, 123.5, 74. , 88. ],
[ 37.5, 109. , 72. , 87. ],
[264. , 231.5, 67. , 100. ],
[ 88. , 195. , 75. , 109. ],
[341.5, 326. , 76. , 103. ],
[102.5, 375.5, 68. , 80. ],
[112.5, 300.5, 36. , 38. ],
[155. , 232.5, 29. , 38. ],
[235.5, 280. , 30. , 41. ],
[246.5, 360.5, 104. , 110. ]], dtype=float32),
‘gt_class’: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1], dtype=int32),
‘gt_poly’: [],
‘h’: 416.0,
‘im_file’: ‘/content/gdrive/My Drive/bccd/train/BloodImage_00145_jpg.rf.a265e7f4f0aab5586c6aa5258bb03966.jpg’,
‘is_crowd’: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32),
‘w’: 416.0}

2.读图片数据

import cv2

# 对于一般的检测任务来说，一张图片上往往会有多个目标物体
# 设置参数MAX_NUM = 50， 即一张图片最多取50个真实框；如果真实
# 框的数目少于50个，则将不足部分的gt_bbox, gt_class和gt_score的各项数值全设置为0
def get_bbox(gt_bbox, gt_class):
    MAX_NUM = 50
    gt_bbox2 = np.zeros((MAX_NUM, 4))
    gt_class2 = np.zeros((MAX_NUM,))
    for i in range(len(gt_bbox)):
        gt_bbox2[i, :] = gt_bbox[i, :]
        gt_class2[i] = gt_class[i]
        if i >= MAX_NUM:
            break
    return gt_bbox2, gt_class2

def get_img_data_from_file(record):
    im_file = record['im_file']
    h = record['h']
    w = record['w']
    is_crowd = record['is_crowd']
    gt_class = record['gt_class']
    gt_bbox = record['gt_bbox']
    difficult = record['difficult']

    img = cv2.imread(im_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    gt_boxes, gt_labels = get_bbox(gt_bbox, gt_class)

    # gt_bbox 用相对值
    gt_boxes[:, 0] = gt_boxes[:, 0] / float(w)
    gt_boxes[:, 1] = gt_boxes[:,