标题转载自AI Studio
标题项目链接https://aistudio.baidu.com/aistudio/projectdetail/3494751
1 环境准备
# 调用一些需要的第三方库
import numpy as np
import pandas as pd
import shutil
import json
import os
import cv2
import glob
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from matplotlib.font_manager import FontProperties
from PIL import Image
import random
myfont = FontProperties(fname=r"NotoSansCJKsc-Medium.otf", size=12)
plt.rcParams['figure.figsize'] = (12, 12)
plt.rcParams['font.family']= myfont.get_family()
plt.rcParams['font.sans-serif'] = myfont.get_name()
plt.rcParams['axes.unicode_minus'] = False
# !unzip data/data54680/coco.zip
!unzip data/data42353/wheat.zip
# Setup the paths to train and test images
TRAIN_DIR = 'wheat/train/'
TRAIN_CSV_PATH = 'wheat/train.json'
# Glob the directories and get the lists of train and test images
train_fns = glob.glob(TRAIN_DIR + '*')
print('数据集图片数量: {}'.format(len(train_fns)))
数据集图片数量: 3422
2 数据整体分布情况
def generate_anno_eda(dataset_path, anno_file):
with open(os.path.join(dataset_path, anno_file)) as f:
anno = json.load(f)
print('标签类别:', anno['categories'])
print('类别数量:', len(anno['categories']))
print('训练集图片数量:', len(anno['images']))
print('训练集标签数量:', len(anno['annotations']))
total=[]
for img in anno['images']:
hw = (img['height'],img['width'])
total.append(hw)
unique = set(total)
for k in unique:
print('长宽为(%d,%d)的图片数量为:'%k,total.count(k))
ids=[]
images_id=[]
for i in anno['annotations']:
ids.append(i['id'])
images_id.append(i['image_id'])
print('训练集图片数量:', len(anno['images']))
print('unique id 数量:', len(set(ids)))
print('unique image_id 数量', len(set(images_id)))
# 创建类别标签字典
category_dic=dict([(i['id'],i['name']) for i in anno['categories']])
counts_label=dict([(i['name'],0) for i in anno['categories']])
for i in anno['annotations']:
counts_label[category_dic[i['category_id']]] += 1
label_list = counts_label.keys() # 各部分标签
print('标签列表:', label_list)
size = counts_label.values() # 各部分大小
color = ['#FFB6C1', '#D8BFD8', '#9400D3', '#483D8B', '#4169E1', '#00FFFF','#B1FFF0','#ADFF2F','#EEE8AA','#FFA500','#FF6347'] # 各部分颜色
# explode = [0.05, 0, 0] # 各部分突出值
patches, l_text, p_text = plt.pie(size, labels=label_list, colors=color, labeldistance=1.1, autopct="%1.1f%%", shadow=False, startangle=90, pctdistance=0.6, textprops={'fontproperties':myfont})
plt.axis("equal") # 设置横轴和纵轴大小相等,这样饼才是圆的
plt.legend(prop=myfont)
plt.show()
# 分析训练集数据
generate_anno_eda('wheat', 'train.json')
2.1 图片整体分析
2.1.1 图像分辨率
# 读取训练集标注文件
with open(TRAIN_CSV_PATH, 'r', encoding='utf-8') as f:
train_data = json.load(f)
train_fig = pd.DataFrame(train_data['images'])
train_fig.head()
file_name | height | id | width | |
---|---|---|---|---|
0 | b6ab77fd7.jpg | 1024 | 1 | 1024 |
1 | b53afdf5c.jpg | 1024 | 2 | 1024 |
2 | 7b72ea0fb.jpg | 1024 | 3 | 1024 |
3 | 91c9d9c38.jpg | 1024 | 4 | 1024 |
4 | 41c0123cc.jpg | 1024 | 5 | 1024 |
ps = np.zeros(len(train_fig))
for i in range(len(train_fig)):
ps[i]=train_fig['width'][i] * train_fig['height'][i]/1e6
plt.title('训练集图片大小分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
train_anno = pd.DataFrame(train_data['annotations'])
df_train = pd.merge(left=train_fig, right=train_anno, how='inner', left_on='id', right_on='image_id')
df_train['bbox_xmin'] = df_train['bbox'].apply(lambda x: x[0])
df_train['bbox_ymin'] = df_train['bbox'].apply(lambda x: x[1])
df_train['bbox_w'] = df_train['bbox'].apply(lambda x: x[2])
df_train['bbox_h'] = df_train['bbox'].apply(lambda x: x[3])
df_train['bbox_xcenter'] = df_train['bbox'].apply(lambda x: (x[0]+0.5*x[2]))
df_train['bbox_ycenter'] = df_train['bbox'].apply(lambda x: (x[1]+0.5*x[3]))
def get_all_bboxes(df, name):
image_bboxes = df[df.file_name == name]
bboxes = []
categories = []
for _,row in image_bboxes.iterrows():
bboxes.append((row.bbox_xmin, row.bbox_ymin, row.bbox_w, row.bbox_h, row.category_id))
return bboxes
def plot_image_examples(df, rows=3, cols=3, title='Image examples'):
fig, axs = plt.subplots(rows, cols, figsize=(15,15))
color = ['#FFB6C1', '#D8BFD8', '#9400D3', '#483D8B', '#4169E1', '#00FFFF','#B1FFF0','#ADFF2F','#EEE8AA','#FFA500','#FF6347'] # 各部分颜色
for row in range(rows):
for col in range(cols):
idx = np.random.randint(len(df), size=1)[0]
name = df.iloc[idx]["file_name"]
img = Image.open(TRAIN_DIR + str(name))
axs[row, col].imshow(img)
bboxes = get_all_bboxes(df, name)
for bbox in bboxes:
rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=1,edgecolor=color[bbox[4]],facecolor='none')
axs[row, col].add_patch(rect)
axs[row, col].axis('off')
plt.suptitle(title,fontproperties=myfont)
def plot_gray_examples(df, rows=3, cols=3, title='Image examples'):
fig, axs = plt.subplots(rows, cols, figsize=(15,15))
color = ['#FFB6C1', '#D8BFD8', '#9400D3', '#483D8B', '#4169E1', '#00FFFF','#B1FFF0','#ADFF2F','#EEE8AA','#FFA500','#FF6347'] # 各部分颜色
for row in range(rows):
for col in range(cols):
idx = np.random.randint(len(df), size=1)[0]
name = df.iloc[idx]["file_name"]
img = Image.open(TRAIN_DIR + str(name)).convert('L')
axs[row, col].imshow(img)
bboxes = get_all_bboxes(df, name)
for bbox in bboxes:
rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3],linewidth=1,edgecolor=color[bbox[4]],facecolor='none')
axs[row, col].add_patch(rect)
axs[row, col].axis('off')
plt.suptitle(title,fontproperties=myfont)
2.1.2 图像亮度分析
def get_image_brightness(image):
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# get average brightness
return np.array(gray).mean()
def add_brightness(df):
brightness = []
for _, row in df.iterrows():
name = row["file_name"]
image = cv2.imread(TRAIN_DIR + name)
brightness.append(get_image_brightness(image))
brightness_df = pd.DataFrame(brightness)
brightness_df.columns = ['brightness']
df = pd.concat([df, brightness_df], ignore_index=True, axis=1)
df.columns = ['file_name', 'brightness']
return df
images_df = pd.DataFrame(df_train.file_name.unique())
images_df.columns = ['file_name']
brightness_df = add_brightness(images_df)
brightness_df.head()
dark_names = brightness_df[brightness_df['brightness'] < 50].file_name
plot_image_examples(df_train[df_train.file_name.isin(dark_names)], title='暗图片')
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/font_manager.py:1331: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))
bright_names = brightness_df[brightness_df['brightness'] > 130].file_name
plot_image_examples(df_train[df_train.file_name.isin(bright_names)], title='亮图片')
sns.set(rc={'figure.figsize':(12,6)})
ps = np.zeros(len(brightness_df))
for i in range(len(brightness_df)):
ps[i]=brightness_df['brightness'][i]
plt.title('图片亮度分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7fd23c687450>
2.2 目标分布分析
ps = np.zeros(len(df_train))
for i in range(len(df_train)):
ps[i]=df_train['area'][i]/1e6
plt.title('训练集目标大小分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7fd23c5e8fd0>
# 各类别目标形状分布
sns.set(rc={'figure.figsize':(12,6)})
sns.relplot(x="bbox_w", y="bbox_h", hue="category_id", col="category_id", data=df_train[0:1000])
<seaborn.axisgrid.FacetGrid at 0x7fd23c50d4d0>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-2Ts81c5A-1645855593354)(output_23_1.png)]
# 各类别目标中心点形状分布
sns.set(rc={'figure.figsize':(12,6)})
sns.relplot(x="bbox_xcenter", y="bbox_ycenter", hue="category_id", col="category_id", data=df_train[0:1000]);
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-9OXTFjLE-1645855593354)(output_24_0.png)]
sns.set(rc={'figure.figsize':(12,6)})
plt.title('训练集目标大小分布', fontproperties=myfont)
sns.violinplot(x=df_train['category_id'],y=df_train['area'])
df_train.area.describe()
count 147793.000000
mean 6843.356576
std 5876.326590
min 2.000000
25% 3658.000000
50% 5488.000000
75% 8272.000000
max 529788.000000
Name: area, dtype: float64
sns.set(rc={'figure.figsize':(12,6)})
plt.title('训练集小目标分布', fontproperties=myfont)
plt.ylim(0, 4000)
sns.violinplot(x=df_train['category_id'],y=df_train['area'])
sns.set(rc={'figure.figsize':(12,6)})
plt.title('训练集大目标分布', fontproperties=myfont)
plt.ylim(10000, max(df_train.area))
sns.violinplot(x=df_train['category_id'],y=df_train['area'])
graph=sns.countplot(data=df_train, x='category_id')
graph.set_xticklabels(graph.get_xticklabels(), rotation=90)
plt.title('各类别目标数量分布', fontproperties=myfont)
for p in graph.patches:
height = p.get_height()
graph.text(p.get_x()+p.get_width()/2., height + 0.1,height ,ha="center")
2.3 重点图片分析
2.3.1 单张图片目标数量分布
df_train['bbox_count'] = df_train.apply(lambda row: 1 if any(row.bbox) else 0, axis=1)
train_images_count = df_train.groupby('file_name').sum().reset_index()
train_images_count['bbox_count'].describe()
count 3373.000000
mean 43.816484
std 20.374820
min 1.000000
25% 28.000000
50% 43.000000
75% 59.000000
max 116.000000
Name: bbox_count, dtype: float64
# 目标数量超过50个的图片
train_images_count['file_name'][train_images_count['bbox_count']>50]
0 00333207f.jpg
7 00ea5e5ee.jpg
17 015939012.jpg
23 02640d9da.jpg
24 026b6f389.jpg
...
3356 feac3a701.jpg
3360 feda9265c.jpg
3366 ffaa964a2.jpg
3368 ffb445410.jpg
3369 ffbf75e5b.jpg
Name: file_name, Length: 1272, dtype: object
# 目标数量超过100个的图片
train_images_count['file_name'][train_images_count['bbox_count']>50]
0 00333207f.jpg
7 00ea5e5ee.jpg
17 015939012.jpg
23 02640d9da.jpg
24 026b6f389.jpg
...
3356 feac3a701.jpg
3360 feda9265c.jpg
3366 ffaa964a2.jpg
3368 ffb445410.jpg
3369 ffbf75e5b.jpg
Name: file_name, Length: 1272, dtype: object
less_spikes_ids = train_images_count[train_images_count['bbox_count'] > 50].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='单图目标超过50个(示例)')
less_spikes_ids = train_images_count[train_images_count['bbox_count'] > 100].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='单图目标超过100个(示例)')
less_spikes_ids = train_images_count[train_images_count['bbox_count'] < 5].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='单图目标少于5个(示例)')
2.3.2 单图目标覆盖分析
less_spikes_ids = train_images_count[train_images_count['area'] > max(train_images_count['area'])*0.9].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标总面积最大(示例)')
less_spikes_ids = train_images_count[train_images_count['area'] < min(train_images_count['area'])*1.1].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标总面积最小(示例)')
2.3.3 超大/极小目标分析
df_train['bbox_count'] = df_train.apply(lambda row: 1 if any(row.bbox) else 0, axis=1)
train_images_count = df_train.groupby('file_name').max().reset_index()
less_spikes_ids = train_images_count[train_images_count['area'] > max(train_images_count['area'])*0.8].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='单目标面积最大(示例)')
df_train['bbox_count'] = df_train.apply(lambda row: 1 if any(row.bbox) else 0, axis=1)
train_images_count = df_train.groupby('file_name').min().reset_index()
less_spikes_ids = train_images_count[train_images_count['area'] > min(train_images_count['area'])*1.2].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='单目标面积最小(示例)')
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-GPShLALg-1645855593354)(output_43_0.png)]
2.4 目标遮挡分析
# 计算IOU
def bb_intersection_over_union(boxA, boxB):
boxA = [int(x) for x in boxA]
boxB = [int(x) for x in boxB]
boxA = [boxA[0], boxA[1], boxA[0]+boxA[2], boxA[1]+boxA[3]]
boxB = [boxB[0], boxB[1], boxB[0]+boxB[2], boxB[1]+boxB[3]]
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
iou = interArea / float(boxAArea + boxBArea - interArea)
return iou
# tmp 是一个pandas Series,且索引从0开始
def bbox_iou(tmp):
iou_agg = 0
iou_cnt = 0
for i in range(len(tmp)):
for j in range(len(tmp)):
if i != j:
iou_agg += bb_intersection_over_union(tmp[i], tmp[j])
if bb_intersection_over_union(tmp[i], tmp[j]) > 0:
iou_cnt += 1
iou_agg = iou_agg/2
iou_cnt = iou_cnt/2
return iou_agg, iou_cnt
file_list = df_train['file_name'].unique()
train_iou_cal = pd.DataFrame(columns=('file_name', 'iou_agg', 'iou_cnt'))
for i in range(len(file_list)):
tmp = df_train['bbox'][df_train.file_name==file_list[i]].reset_index(drop=True)
iou_agg, iou_cnt = bbox_iou(tmp)
train_iou_cal.loc[len(train_iou_cal)] = [file_list[i], iou_agg, iou_cnt]
train_iou_cal.iou_agg.describe()
ps = np.zeros(len(train_iou_cal))
for i in range(len(train_iou_cal)):
ps[i]=train_iou_cal['iou_agg'][i]
plt.title('训练集目标遮挡程度分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
train_iou_cal.iou_cnt.describe()
ps = np.zeros(len(train_iou_cal))
for i in range(len(train_iou_cal)):
ps[i]=train_iou_cal['iou_cnt'][i]
plt.title('训练集目标遮挡数量分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
less_spikes_ids = train_iou_cal[train_iou_cal['iou_agg'] > max(train_iou_cal['iou_agg'])*0.9].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标遮挡程度最高(示例)')
less_spikes_ids = train_iou_cal[train_iou_cal['iou_agg'] <= min(train_iou_cal['iou_agg'])*1.1].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标遮挡程度最低(示例)')
less_spikes_ids = train_iou_cal[train_iou_cal['iou_cnt'] > max(train_iou_cal['iou_cnt'])*0.9].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标遮挡数量最高(示例)')
less_spikes_ids = train_iou_cal[train_iou_cal['iou_cnt'] <= min(train_iou_cal['iou_cnt'])*1.1].file_name
plot_image_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标遮挡数量最低(示例)')
2.5 颜色分析
2.5.1 图像RGB分布
files = os.listdir(TRAIN_DIR)
R = 0.
G = 0.
B = 0.
R_2 = 0.
G_2 = 0.
B_2 = 0.
N = 0
for f in files:
img = cv2.imread(TRAIN_DIR+f)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img)
h, w, c = img.shape
N += h*w
R_t = img[:, :, 0]
R += np.sum(R_t)
R_2 += np.sum(np.power(R_t, 2.0))
G_t = img[:, :, 1]
G += np.sum(G_t)
G_2 += np.sum(np.power(G_t, 2.0))
B_t = img[:, :, 2]
B += np.sum(B_t)
B_2 += np.sum(np.power(B_t, 2.0))
R_mean = R/N
G_mean = G/N
B_mean = B/N
R_std = np.sqrt(R_2/N - R_mean*R_mean)
G_std = np.sqrt(G_2/N - G_mean*G_mean)
B_std = np.sqrt(B_2/N - B_mean*B_mean)
print("R_mean: %f, G_mean: %f, B_mean: %f" % (R_mean, G_mean, B_mean))
print("R_std: %f, G_std: %f, B_std: %f" % (R_std, G_std, B_std))
R_mean: 80.398947, G_mean: 80.899598, B_mean: 54.711709
R_std: 62.528853, G_std: 60.699236, B_std: 49.439114
2.5.2 目标RGB分析
# 计算bbox的RGB
def bb_rgb_cal(img, boxA):
boxA = [int(x) for x in boxA]
boxA = [boxA[0], boxA[1], boxA[0]+boxA[2], boxA[1]+boxA[3]]
img = img.crop(boxA)
width = img.size[0]
height = img.size[1]
img = img.convert('RGB')
array = []
for x in range(width):
for y in range(height):
r, g, b = img.getpixel((x,y))
rgb = (r, g, b)
array.append(rgb)
return round(np.mean(array[0]),2), round(np.mean(array[1]),2), round(np.mean(array[2]),2)
# 可能遇到jupyter输出内存报错
from tqdm import tqdm
df_train['r_channel'] = 0
df_train['g_channel'] = 0
df_train['b_channel'] = 0
for i in tqdm(df_train.index):
array = bb_rgb_cal(Image.open(TRAIN_DIR + str(df_train.file_name[i])), df_train.bbox[i])
df_train['r_channel'].at[i] = array[0]
df_train['g_channel'].at[i] = array[1]
df_train['b_channel'].at[i] = array[2]
ps = np.zeros(len(df_train[:10000]))
for i in range(len(df_train[:10000])):
ps[i]=df_train['r_channel'][df_train.category_id==1][i]
plt.title('类别1目标r_channel分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7f4133d14910>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-pQobiawF-1645855593355)(output_62_1.png)]
ps = np.zeros(len(df_train[:10000]))
for i in range(len(df_train[:10000])):
ps[i]=df_train['g_channel'][df_train.g_channel>0][df_train.category_id==1][i]
plt.title('类别1目标g_channel分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7f4133331dd0>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-l65uPHdN-1645855593355)(output_63_1.png)]
ps = np.zeros(len(df_train[:10000]))
for i in range(len(df_train[:10000])):
ps[i]=df_train['b_channel'][df_train.b_channel>0][df_train.category_id==1][i]
plt.title('类别1目标b_channel分布', fontproperties=myfont)
sns.distplot(ps, bins=21,kde=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7f41333e5d50>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-3LYxAlKq-1645855593355)(output_64_1.png)]
2.5.3 灰度图效果
less_spikes_ids = train_iou_cal[train_iou_cal['iou_cnt'] > max(train_iou_cal['iou_cnt'])*0.8].file_name
plot_gray_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标遮挡数量最高(灰度)')
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-1Ai4nvk5-1645855593356)(output_66_0.png)]
less_spikes_ids = train_iou_cal[train_iou_cal['iou_cnt'] <= min(train_iou_cal['iou_cnt'])*1.1].file_name
plot_gray_examples(df_train[df_train.file_name.isin(less_spikes_ids)], title='目标遮挡数量最低(灰度)')
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-EMP76M7H-1645855593356)(output_67_0.png)]
3 VOC数据集格式转换
由于上面的EDA是基于COCO格式数据集开发的,为避免重复造轮子,分析VOC数据集时,这里使用脚本将VOC转为COCO格式数据。
# 获取示例数据集
!wget https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz
# 解压数据集
!tar -zxvf insect_det.tar.gz
import xml.etree.ElementTree as ET
import os
import json
coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []
category_set = dict()
image_set = set()
category_item_id = -1
image_id = 20180000000
annotation_id = 0
def addCatItem(name):
global category_item_id
category_item = dict()
category_item['supercategory'] = 'none'
category_item_id += 1
category_item['id'] = category_item_id
category_item['name'] = name
coco['categories'].append(category_item)
category_set[name] = category_item_id
return category_item_id
def addImgItem(file_name, size):
global image_id
if file_name is None:
raise Exception('Could not find filename tag in xml file.')
if size['width'] is None:
raise Exception('Could not find width tag in xml file.')
if size['height'] is None:
raise Exception('Could not find height tag in xml file.')
image_id += 1
image_item = dict()
image_item['id'] = image_id
image_item['file_name'] = file_name
image_item['width'] = size['width']
image_item['height'] = size['height']
coco['images'].append(image_item)
image_set.add(file_name)
return image_id
def addAnnoItem(object_name, image_id, category_id, bbox):
global annotation_id
annotation_item = dict()
annotation_item['segmentation'] = []
seg = []
# bbox[] is x,y,w,h
# left_top
seg.append(bbox[0])
seg.append(bbox[1])
# left_bottom
seg.append(bbox[0])
seg.append(bbox[1] + bbox[3])
# right_bottom
seg.append(bbox[0] + bbox[2])
seg.append(bbox[1] + bbox[3])
# right_top
seg.append(bbox[0] + bbox[2])
seg.append(bbox[1])
annotation_item['segmentation'].append(seg)
annotation_item['area'] = bbox[2] * bbox[3]
annotation_item['iscrowd'] = 0
annotation_item['ignore'] = 0
annotation_item['image_id'] = image_id
annotation_item['bbox'] = bbox
annotation_item['category_id'] = category_id
annotation_id += 1
annotation_item['id'] = annotation_id
coco['annotations'].append(annotation_item)
def _read_image_ids(image_sets_file):
ids = []
with open(image_sets_file) as f:
for line in f:
ids.append(line.rstrip())
return ids
"""通过txt文件生成"""
#split ='train' 'va' 'trainval' 'test'
def parseXmlFiles_by_txt(data_dir,json_save_path,split='train'):
print("hello")
labelfile=split+".txt"
image_sets_file = data_dir + "/ImageSets/Main/"+labelfile
ids=_read_image_ids(image_sets_file)
for _id in ids:
xml_file=data_dir + f"/Annotations/{_id}.xml"
bndbox = dict()
size = dict()
current_image_id = None
current_category_id = None
file_name = None
size['width'] = None
size['height'] = None
size['depth'] = None
tree = ET.parse(xml_file)
root = tree.getroot()
if root.tag != 'annotation':
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
# elem is <folder>, <filename>, <size>, <object>
for elem in root:
current_parent = elem.tag
current_sub = None
object_name = None
if elem.tag == 'folder':
continue
if elem.tag == 'filename':
file_name = elem.text
if file_name in category_set:
raise Exception('file_name duplicated')
# add img item only after parse <size> tag
elif current_image_id is None and file_name is not None and size['width'] is not None:
if file_name not in image_set:
current_image_id = addImgItem(file_name, size)
print('add image with {} and {}'.format(file_name, size))
else:
raise Exception('duplicated image: {}'.format(file_name))
# subelem is <width>, <height>, <depth>, <name>, <bndbox>
for subelem in elem:
bndbox['xmin'] = None
bndbox['xmax'] = None
bndbox['ymin'] = None
bndbox['ymax'] = None
current_sub = subelem.tag
if current_parent == 'object' and subelem.tag == 'name':
object_name = subelem.text
if object_name not in category_set:
current_category_id = addCatItem(object_name)
else:
current_category_id = category_set[object_name]
elif current_parent == 'size':
if size[subelem.tag] is not None:
raise Exception('xml structure broken at size tag.')
size[subelem.tag] = int(subelem.text)
# option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
for option in subelem:
if current_sub == 'bndbox':
if bndbox[option.tag] is not None:
raise Exception('xml structure corrupted at bndbox tag.')
bndbox[option.tag] = int(option.text)
# only after parse the <object> tag
if bndbox['xmin'] is not None:
if object_name is None:
raise Exception('xml structure broken at bndbox tag')
if current_image_id is None:
raise Exception('xml structure broken at bndbox tag')
if current_category_id is None:
raise Exception('xml structure broken at bndbox tag')
bbox = []
# x
bbox.append(bndbox['xmin'])
# y
bbox.append(bndbox['ymin'])
# w
bbox.append(bndbox['xmax'] - bndbox['xmin'])
# h
bbox.append(bndbox['ymax'] - bndbox['ymin'])
print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id,
bbox))
addAnnoItem(object_name, current_image_id, current_category_id, bbox)
json.dump(coco, open(json_save_path, 'w'))
"""直接从xml文件夹中生成"""
def parseXmlFiles(xml_path,json_save_path):
for f in os.listdir(xml_path):
if not f.endswith('.xml'):
continue
bndbox = dict()
size = dict()
current_image_id = None
current_category_id = None
file_name = None
size['width'] = None
size['height'] = None
size['depth'] = None
xml_file = os.path.join(xml_path, f)
print(xml_file)
tree = ET.parse(xml_file)
root = tree.getroot()
if root.tag != 'annotation':
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
# elem is <folder>, <filename>, <size>, <object>
for elem in root:
current_parent = elem.tag
current_sub = None
object_name = None
if elem.tag == 'folder':
continue
if elem.tag == 'filename':
file_name = elem.text
if file_name in category_set:
raise Exception('file_name duplicated')
# add img item only after parse <size> tag
elif current_image_id is None and file_name is not None and size['width'] is not None:
if file_name not in image_set:
current_image_id = addImgItem(file_name, size)
print('add image with {} and {}'.format(file_name, size))
else:
raise Exception('duplicated image: {}'.format(file_name))
# subelem is <width>, <height>, <depth>, <name>, <bndbox>
for subelem in elem:
bndbox['xmin'] = None
bndbox['xmax'] = None
bndbox['ymin'] = None
bndbox['ymax'] = None
current_sub = subelem.tag
if current_parent == 'object' and subelem.tag == 'name':
object_name = subelem.text
if object_name not in category_set:
current_category_id = addCatItem(object_name)
else:
current_category_id = category_set[object_name]
elif current_parent == 'size':
if size[subelem.tag] is not None:
raise Exception('xml structure broken at size tag.')
size[subelem.tag] = int(subelem.text)
# option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
for option in subelem:
if current_sub == 'bndbox':
if bndbox[option.tag] is not None:
raise Exception('xml structure corrupted at bndbox tag.')
bndbox[option.tag] = int(option.text)
# only after parse the <object> tag
if bndbox['xmin'] is not None:
if object_name is None:
raise Exception('xml structure broken at bndbox tag')
if current_image_id is None:
raise Exception('xml structure broken at bndbox tag')
if current_category_id is None:
raise Exception('xml structure broken at bndbox tag')
bbox = []
# x
bbox.append(bndbox['xmin'])
# y
bbox.append(bndbox['ymin'])
# w
bbox.append(bndbox['xmax'] - bndbox['xmin'])
# h
bbox.append(bndbox['ymax'] - bndbox['ymin'])
print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id,
bbox))
addAnnoItem(object_name, current_image_id, current_category_id, bbox)
json.dump(coco, open(json_save_path, 'w'))
#通过文件夹生成
ann_path="insect_det/Annotations"
json_save_path="insect_det/train.json"
parseXmlFiles(ann_path,json_save_path)
# Setup the paths to train and test images
TRAIN_DIR = 'insect_det/JPEGImages/'
TRAIN_CSV_PATH = 'insect_det/train.json'
# Glob the directories and get the lists of train and test images
train_fns = glob.glob(TRAIN_DIR + '*')
print('数据集图片数量: {}'.format(len(train_fns)))
数据集图片数量: 217
# 效果测试
generate_anno_eda('insect_det', 'train.json')
标签类别: [{'supercategory': 'none', 'id': 0, 'name': 'leconte'}, {'supercategory': 'none', 'id': 1, 'name': 'boerner'}, {'supercategory': 'none', 'id': 2, 'name': 'armandi'}, {'supercategory': 'none', 'id': 3, 'name': 'linnaeus'}, {'supercategory': 'none', 'id': 4, 'name': 'coleoptera'}, {'supercategory': 'none', 'id': 5, 'name': 'acuminatus'}]
类别数量: 6
训练集图片数量: 217
训练集标签数量: 1407
长宽为(749,749)的图片数量为: 1
长宽为(565,565)的图片数量为: 1
长宽为(570,570)的图片数量为: 1
长宽为(557,557)的图片数量为: 1
长宽为(523,523)的图片数量为: 1
长宽为(635,635)的图片数量为: 2
长宽为(645,645)的图片数量为: 1
长宽为(718,718)的图片数量为: 1
长宽为(702,702)的图片数量为: 2
长宽为(641,641)的图片数量为: 5
长宽为(639,639)的图片数量为: 2
长宽为(513,513)的图片数量为: 1
长宽为(602,602)的图片数量为: 1
长宽为(601,601)的图片数量为: 1
长宽为(729,729)的图片数量为: 2
长宽为(536,536)的图片数量为: 1
长宽为(657,657)的图片数量为: 3
长宽为(587,587)的图片数量为: 1
长宽为(605,605)的图片数量为: 1
长宽为(613,613)的图片数量为: 1
长宽为(554,554)的图片数量为: 1
长宽为(733,733)的图片数量为: 1
长宽为(740,740)的图片数量为: 1
长宽为(631,631)的图片数量为: 3
长宽为(649,649)的图片数量为: 1
长宽为(623,623)的图片数量为: 6
长宽为(670,670)的图片数量为: 1
长宽为(558,558)的图片数量为: 1
长宽为(610,610)的图片数量为: 3
长宽为(671,671)的图片数量为: 2
长宽为(609,609)的图片数量为: 1
长宽为(661,661)的图片数量为: 2
长宽为(653,653)的图片数量为: 4
长宽为(627,627)的图片数量为: 5
长宽为(619,619)的图片数量为: 4
长宽为(499,499)的图片数量为: 1
长宽为(647,647)的图片数量为: 2
长宽为(583,583)的图片数量为: 1
长宽为(633,633)的图片数量为: 1
长宽为(697,697)的图片数量为: 1
长宽为(632,632)的图片数量为: 4
长宽为(637,637)的图片数量为: 2
长宽为(643,643)的图片数量为: 3
长宽为(636,636)的图片数量为: 3
长宽为(644,644)的图片数量为: 1
长宽为(638,638)的图片数量为: 8
长宽为(514,514)的图片数量为: 1
长宽为(655,655)的图片数量为: 3
长宽为(625,625)的图片数量为: 1
长宽为(621,621)的图片数量为: 1
长宽为(640,640)的图片数量为: 2
长宽为(624,624)的图片数量为: 1
长宽为(541,541)的图片数量为: 1
长宽为(549,549)的图片数量为: 1
长宽为(630,630)的图片数量为: 5
长宽为(650,650)的图片数量为: 3
长宽为(681,681)的图片数量为: 1
长宽为(617,617)的图片数量为: 4
长宽为(663,663)的图片数量为: 1
长宽为(599,599)的图片数量为: 1
长宽为(616,616)的图片数量为: 3
长宽为(495,495)的图片数量为: 1
长宽为(659,659)的图片数量为: 2
长宽为(629,629)的图片数量为: 3
长宽为(595,595)的图片数量为: 1
长宽为(651,651)的图片数量为: 2
长宽为(582,582)的图片数量为: 1
长宽为(693,693)的图片数量为: 1
长宽为(660,660)的图片数量为: 3
长宽为(628,628)的图片数量为: 2
长宽为(652,652)的图片数量为: 5
长宽为(620,620)的图片数量为: 8
长宽为(581,581)的图片数量为: 1
长宽为(580,580)的图片数量为: 1
长宽为(572,572)的图片数量为: 1
长宽为(590,590)的图片数量为: 1
长宽为(577,577)的图片数量为: 1
长宽为(576,576)的图片数量为: 1
长宽为(704,704)的图片数量为: 1
长宽为(560,560)的图片数量为: 1
长宽为(614,614)的图片数量为: 3
长宽为(600,600)的图片数量为: 2
长宽为(676,676)的图片数量为: 2
长宽为(612,612)的图片数量为: 4
长宽为(552,552)的图片数量为: 1
长宽为(622,622)的图片数量为: 3
长宽为(674,674)的图片数量为: 1
长宽为(656,656)的图片数量为: 3
长宽为(608,608)的图片数量为: 1
长宽为(691,691)的图片数量为: 1
长宽为(592,592)的图片数量为: 1
长宽为(634,634)的图片数量为: 4
长宽为(518,518)的图片数量为: 1
长宽为(589,589)的图片数量为: 1
长宽为(596,596)的图片数量为: 1
长宽为(588,588)的图片数量为: 1
长宽为(692,692)的图片数量为: 1
长宽为(564,564)的图片数量为: 3
长宽为(684,684)的图片数量为: 1
长宽为(569,569)的图片数量为: 1
长宽为(765,765)的图片数量为: 1
长宽为(707,707)的图片数量为: 1
长宽为(498,498)的图片数量为: 1
长宽为(754,754)的图片数量为: 1
长宽为(626,626)的图片数量为: 1
长宽为(512,512)的图片数量为: 1
长宽为(615,615)的图片数量为: 2
长宽为(665,665)的图片数量为: 1
长宽为(611,611)的图片数量为: 5
长宽为(603,603)的图片数量为: 1
长宽为(618,618)的图片数量为: 2
长宽为(662,662)的图片数量为: 3
长宽为(607,607)的图片数量为: 2
训练集图片数量: 217
unique id 数量: 1407
unique image_id 数量 217
标签列表: dict_keys(['leconte', 'boerner', 'armandi', 'linnaeus', 'coleoptera', 'acuminatus'])
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-hwp5AbeF-1645855593356)(output_73_1.png)]
4 小结
本文在目标检测EDA模板:瓷砖表面瑕疵数据集分析项目基础上,特别针对密集型目标检测的EDA任务进行了完善,同时也进一步丰富了原有的EDA模板。最后通过VOC数据集转换COCO脚本,形成了完整的COCO格式数据集矩形框标注EDA任务的分析模板。
后续,将进一步完善实例分割、图像分割任务的EDA模板。