在深度学习的物体检测中,我们需要一个csv文件来存储我们训练集图片的文件和图片里面对应物体的boundingbox的位置坐标。今天来教教生成CSV的python代码。
首先你要生成训练数据的GT图,就是训练原图里的物体的bounding box 对应位置转化成白色方块放在纯黑色图里
这个代码可以需要放入原图的路径和GT图的路径,最后生成的CSV文件是原图路径+boundingbox的位置大小及坐标。
导入各种库和对图片进行读取和处理
import cv2
import numpy as np
import pandas as pd
import glob
import csv
import os
import pdb
import natsort
#图片的读取和预处理
#path to gt image folder
gt_image_folder = 'training_data_gt_0525_4/'
gt_images = glob.glob(gt_image_folder + '*.jpg') + glob.glob(gt_image_folder + '*.png') + glob.glob(gt_image_folder + '*.jpeg') + glob.glob(gt_image_folder + '*.JPEG')
#set name of the label
label_name = 'cell2'
gt_images = natsort.natsorted(gt_images)
生成Boundingbox 并转化成CSV字典格式
bbox_directory = 'bbox_matlab_cell2_train_0525_4'
os.makedirs(bbox_directory,exist_ok=True)
#training_path = 'original/' #Training data file path
training_path = './training_data_original_0525_4/' #Training data file path
new_dict = {'fn':[],label_name:[]}
for j,i in enumerate(gt_images):
img = cv2.imread(i)
ret,thresh = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_RGB2GRAY),127,255,cv2.THRESH_BINARY)
contours,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
bbox_list = []
x = None
for c in contours:
x,y,w,h = cv2.boundingRect(c)
#pdb.set_trace()
#only keep large boxes or 0 0 0 0 (negative example) boxes
if w>1 and w>1 : #suppress small bounding boxes (optional)
#bbox_list.extend([x,y,x+w-1,y+h-1])
bbox_list.extend([x+1,y+1,w,h]) #MATLAB convention: start from 1 instead of 0
cv2.rectangle(img,(x,y),(x+w-1,y+h-1),(0,255,0),1)
else:
continue
#new_dict.setdefault('fn', []).append(i.replace('.png','.jpeg').replace('\\','/').replace(gt_image_folder,training_path))
if bbox_list: #remove gt files that do not contain any bboxes
new_dict.setdefault('fn', []).append(training_path+i.split('/')[1].split('_')[0]+'.png')
new_dict.setdefault(label_name, []).append(' '.join (str(p) for p in bbox_list))
#draw bounding boxes on gt images
cv2.imwrite('{}/{}_BBox.png'.format(bbox_directory,j),img)
#print (x,y,w,h)
print('writing {}/{}_BBox.png...'.format(bbox_directory,j))
else:
#output 0,0,0,0 for empty image (negative training samples)
bbox_list.extend([1,1,1,1])
#这里是把原图的路径经过处理后,放入CSV里
new_dict.setdefault('fn', []).append(training_path+i.split('/')[1].split('_')[0]+'.png')
new_dict.setdefault(label_name, []).append(' '.join (str(p) for p in bbox_list))
由字典格式转化成CSV文件
df = pd.DataFrame(new_dict, columns=['fn',label_name])
df.to_csv('train_cell2.csv', index=False) #change the name of the output .csv file
生成的CSV文件截图