数据集统计信息
#拉取PaddleDetection
!git clone https://github.com.cnpmjs.org/PaddlePaddle/PaddleDetection
#存入持久层中
!mv PaddleDetection/ work/
#导入所需要的依赖
!pip install -r work/PaddleDetection/requirements.txt
#导入转格式所需要的包
!pip install pycocotools
!pip install scikit-image
COCO标注
!unzip -oq /home/aistudio/data/data97273/annotations_trainval2017.zip -d ./
总的验证集
!unzip -oq /home/aistudio/data/data97273/val2017.zip -d ./
总的训练集
!unzip -oq /home/aistudio/data/data97273/train2017.zip -d ./
#创建解析好的图片与xml文件的目录
!mkdir -p VOCData/images/
!mkdir -p VOCData/Annotations/
!mkdir COCOData/
#处理目标检测的数据集
!python ProcessData.py
#没有这个文件时创建一个空的label的txt文件
!touch VOCData/label_list.txt
#移动到dataset文件夹
!mv VOCData work/PaddleDetection/dataset/
%cd work/PaddleDetection/
“”"
按VOC格式划分数据集,train : val = 0.85 : 0.15
生成标签label_list.txt
“”"
import os
import shutil
import skimage.io as io
from tqdm import tqdm
from random import shuffle
dataset = ‘dataset/VOCData/’
train_txt = os.path.join(dataset, ‘train_val.txt’)
val_txt = os.path.join(dataset, ‘val.txt’)
lbl_txt = os.path.join(dataset, ‘label_list.txt’)
classes = [
“person”
]
with open(lbl_txt, ‘w’) as f:
for l in classes:
f.write(l+’\n’)
xml_base = ‘Annotations’
img_base = ‘images’
xmls = [v for v in os.listdir(os.path.join(dataset, xml_base)) if v.endswith(’.xml’)]
shuffle(xmls)
split = int(0.85 * len(xmls)) #划分训练集与验证集
with open(train_txt, ‘w’) as f:
for x in tqdm(xmls[:split]):
m = x[:-4]+’.jpg’
xml_path = os.path.join(xml_base, x)
img_path = os.path.join(img_base, m)
f.write(’{} {}\n’.fo