目录
COCO数据集格式简介:
COCO的文件夹主要包含标注文件夹和图片文件夹,格式如下
标注文件主要包括5个主keys,如下(官网http://cocodataset.org/#format-data)
{
"info": info, # 可以设置为"mydata"
"images": [image], # list的形式为所有图片的数据
"annotations": [annotation], # list的形式,所有图片的标注信息
"categories": [category], # 类别信息
"licenses": [license], # 可以设置为['mylicenses']
}
images键,其全部的信息如下
image{
"id": int,
"width": int,
"height": int,
"file_name": str,
"license": int,
"flickr_url": str,
"coco_url": str,
"date_captured": datetime,
}
简化后的信息可以如下
image{
"id": int,
"width": int,
"height": int,
"file_name": str,
}
annotations,全部信息如下
annotation{
"id": int,
"image_id": int,
"category_id": int, # 就是类别的编号,也就是label
"segmentation": RLE or [polygon],
"area": float,
"bbox": [x,y,width,height], # 注意一下格式
"iscrowd": 0 or 1,
}
categories,全部信息如下
categories[{
"id": int,
"name": str,
"supercategory": str,
}]
一个简单的例子
{
"info": 'coco',
"license": ['none'],
"images": [
{
"height": 224,
"width": 224,
"id": 0,
"file_name": 'figure1.jpg'
},
{
"height": 224,
"width": 224,
"id": 1,
"file_name": 'figure2.jpg'
}]
"annotations":[
{
"id": 0,
"image_id": 0,
"category_id": 10,
"segmentation": [[1, 1, 1, 1, 1, 1, 1, 1],],
"bbox": [0, 0, 1, 1],
"iscrowd": 0,
"area": 1
},]
"categories": [
{
"id": 0,
"name": "1",
"supercategory": "name"
},]
}
将自己的数据集转换为COCO格式:
原始数据格式如下
[
{
"name": "T2019_0.jpg", # 图片名
"category": 0, # 该bbox对应的类别
"bbox": {
"x": 20642,
"y": 20295,
"w": 163,
"h": 134
}
},]
定义一个MyData2COCO类
class MyData2COCO:
def __init__(self): # 初始化
def _categories(self, num_categories): # 获取categories信息
def _image(self, path, h, w): # 获取images信息
def _annotation(self, label, bbox): # 获取annotations信息
def to_coco(self, anno_file, img_dir, num_categories): # 转换实现函数
def save_coco_json(self, instance, save_path): # 保存文件
def __init__(self):
self.images = [] # 存储images键对应的数据
self.annotations = [] # 存储annotations键对应的数据
self.categories = [] # 存储categories键对应的数据
self.img_id = 0 # 统计image的id
self.ann_id = 0 # 统计annotation的id
def _categories(self, num_categories): # num_categories 为总的类别数
for i in range(0, num_categories):
category = {}
category['id'] = i
category['name'] = str(i) # 可根据实际需要修改
category['supercategory'] = 'name' # 可根据实际需要修改
self.categories.append(category)
def _image(self, path, h, w):
image = {}
image['height'] = h
image['width'] = w
image['id'] = self.img_id
image['file_name'] = os.path.basename(path)
return image
def _annotation(self, label, bbox):
bbox = list(bbox.values())
area = bbox[2] * bbox[3]
points = [[bbox[0], bbox[1]], [bbox[0] + bbox[2], bbox[1]], [bbox[2], bbox[1] + bbox[3]], [bbox[0], bbox[1] + bbox[3]]]
annotation = {}
annotation['id'] = self.ann_id
annotation['image_id'] = self.img_id
annotation['category_id'] = label
annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
annotation['bbox'] = bbox
annotation['iscrowd'] = 0
annotation['area'] = area
return annotation
def to_coco(self, anno_file, img_dir, num_categories):
"""
anno_file: 自己数据的文件路径
img_dir: 图片文件夹路径(coco分为train和calid)
num_categories: bbox对应的总类别数目
"""
self._categories(num_categories) # 初始化categories基本信息
with open(anno_file, "r") as f_json:
all_anno_pd = pd.read_json(f_json)
img_names = os.listdir(img_dir)
for img_name in tqdm.tqdm(img_names):
each_img_anno = all_anno_pd[all_anno_pd["name"] == img_name]
bboxs = each_img_anno["bbox"].tolist()
labels = each_img_anno["category"].tolist()
assert each_img_anno["name"].unique()[0] == img_name
for bbox, label in zip(bboxs, labels):
annotation = self._annotation(label, bbox)
self.annotations.append(annotation)
self.ann_id += 1
img_path = os.path.join(img_dir, img_name)
img = cv2.imread(img_path)
h, w, c = img.shape
self.images.append(self._image(img_path, h, w))
self.img_id += 1
instance = {}
instance['info'] = 'mydata2coco'
instance['license'] = ['none']
instance['images'] = self.images
instance['annotations'] = self.annotations
instance['categories'] = self.categories
return instance
def save_coco_json(self, instance, save_path):
with open(save_path, 'w') as fp:
json.dump(instance, fp, indent=1, separators=(',', ': '))
调用定义好的类实现转换
if __name__ == '__main__':
train_imgdir = ""
valid_imgdir = ""
anno_dir = ""
save_path = ""
if os.path.exists(save_path):
shutil.rmtree(save_path)
os.makedirs(save_path)
fabric2coco_train = MyData2COCO()
train_instance = fabric2coco_train.to_coco(anno_dir, train_imgdir)
save_path_name = os.path.join(save_path, 'anno_train.json')
fabric2coco_train.save_coco_json(train_instance, save_path_name)
fabric2coco_valid = MyData2COCO()
valid_instance = fabric2coco_valid.to_coco(anno_dir, valid_imgdir)
save_path_name = os.path.join(save_path, 'anno_valid.json')
fabric2coco_valid.save_coco_json(valid_instance, save_path_name)