训练数据集的图片尺寸修改集标注标签修改
在针对同一角度拍摄的多张已完成标注图片使用深度学习的方式对数据集进行训练时,时长会出现感兴区域过小的情况,固定角度拍摄所得到的图片数据集往往存在部分位置区域可以直接去除,不用参与到模型的训练中,同时可以提高训练数据集的感兴区域,如下图,该图为SODIC2021道路路面病害智能分析算法中的一张图,在同一角度拍摄的图片有很多,但是标注基本只在图片的下半部进行标注,因此可以通过裁剪上半部图片的方式提高数据集在训练过程中的感兴区域。该处只是寻找一个合适的举例。
首先是对数据集中的图片进行查找,只在同一个文件夹下,可以直接用
os.listdir(path)
如果在一个文件夹内存在多个数据集,可以通过以下递归代码进行查找:
def imgs_seek(start_path):
photo_list = [] # 图片列表
def imgs_seek():
os.chdir(start_path) # 查找图片的途径
items = os.listdir(os.curdir) # 该路径下所有文件
photo_type = ['.jpg', '.png', 'jpeg'] # 添加查找图片的格式 注意加. 如'.JPG'
for each in items:
if os.path.splitext(each)[1].lower() in photo_type:
photo_list.append(os.getcwd() + os.sep + each)
if os.path.isdir(each):
imgs_seek(each) # 递归函数查找下一目录下的文件
os.chdir(os.pardir) # 返回上一层文件目录
return photo_list
imgs_seek()
return photo_list
然后是对图片的区域提取,可以通过opencv直接对读取的图片进行矩阵分割
dst = img[heigh_strat:heigh_end, width_start:width_end]
通过函数对分割后的图片进行图片保存和新图片的尺寸输出,代码如下
def img_cut(img, img_save_path, heigh_strat, heigh_end, width_start, width_end):
'''
img:为opencv读取后的图像数据
'''
dst = img[heigh_strat:heigh_end, width_start:width_end] #
now_h, now_w, _ = dst.shape
cv2.imwrite(img_save_path, dst)
return now_h, now_w
对通过labelimg标注以YOLO方式标注生产的标签文件,可以通过以下代码进行标签位置的改变:
def txt_check(txt_path, save_txt_path, heigh_strat, heigh_end, width_start, width_end, heigh, width):
'''
txt_path:原始标签所在的位置
save_txt_path:裁剪后生成的新的标签保存的位置
heigh_strat, heigh_end, width_start, width_end:裁剪的范围
heigh, width:原始图片的高和宽
'''
fp1 = open(txt_path, 'r+')
for line in fp1.readlines():
line_new = []
line = line.split(' ')
with open(save_txt_path, "a+") as fp2:
old_heigh_strat, old_heigh_end = float(line[2]) * heigh, (float(line[2]) + float(line[4])) * heigh
new_heigh_strat = old_heigh_strat - heigh_strat if old_heigh_strat > heigh_strat else 0 #新的图片框所在的像数点高度起始位置
new_heigh_end = old_heigh_end - heigh_strat if old_heigh_end > heigh_strat else 0 #新的图片框所在的像数点高度结束位置
line_new2 = str(float(new_heigh_strat) / (heigh_end - heigh_strat))
line_new4 = float(new_heigh_end - new_heigh_strat) / (heigh_end - heigh_strat)
if line_new4 < 0:
line_new4 = '0'
elif line_new4 < 1:
line_new4 = str(line_new4)
else:
line_new4 = '1'
old_width_strat, old_width_end = float(line[1]) * width, (float(line[1]) + float(line[3])) * width
new_width_strat = old_width_strat - width_start if old_width_strat > width_start else 0 #新的图片框所在的像数点宽度起始位置
new_width_end = old_width_end - width_start if old_width_end > width_start else 0 #新的图片框所在的像数点宽度结束位置
line_new1 = str(float(new_width_strat) / (width_end - width_start))
line_new3 = float(new_width_end - new_width_strat) / (width_end - width_start)
if line_new3 < 0:
line_new3 = '0'
elif line_new3 < 1:
line_new3 = str(line_new4)
else:
line_new3 = '1'
line_new.append(line[0])
line_new.append(line_new1)
line_new.append(line_new2)
line_new.append(line_new3)
line_new.append(line_new4)
line_new = map(str, line_new)
line_new = " ".join(line_new)
if float(line_new3) > 0.001 and float(line_new4) > 0.001:
fp2.write(line_new + '\n')
fp2.close()
通过labelimg对pascalVOC类型的标注生产的xml文件,可通过以下代码对裁剪的图片进行标注更新:
import xml.etree.ElementTree as ET
def xml_check(now_h, now_w, xml_path, save_xml_path, heigh_start, heigh_end, width_start, width_end):
# 打开xml文档
doc = ET.parse(xml_path)
root = doc.getroot()
size_root = root.find("size")
size_root[0].text = str(now_w)
size_root[1].text = str(now_h)
for child in root.findall('object'): # 找到图片中的所有框
sub = child.find('bndbox') # 找到框的标注值并进行读取
# 宽 xmin
if int(sub[0].text) < width_start:
sub[0].text = '0'
elif int(sub[0].text) > width_end:
sub[0].text = str(width_end)
else:
sub[0].text = str(int(sub[0].text) - width_start)
# 高 ymin
if int(sub[1].text) < heigh_start:
sub[1].text = '0'
elif int(sub[1].text) > heigh_end:
sub[1].text = str(heigh_end)
else:
sub[1].text = str(int(sub[1].text) - heigh_start)
# 宽 xmax
if int(sub[2].text) < width_start:
sub[2].text = '0'
elif int(sub[2].text) > width_end:
sub[2].text = str(width_end)
else:
sub[2].text = str(int(sub[2].text) - width_start)
# 高 ymax
if int(sub[3].text) < heigh_start:
sub[3].text = '0'
elif int(sub[3].text) > heigh_end:
sub[3].text = str(heigh_end)
else:
sub[3].text = str(int(sub[3].text) - heigh_start)
if int(sub[2].text) - int(sub[0].text) < 5 or int(sub[3].text) - int(sub[1].text) < 5:
root.remove(child)
doc.write(save_xml_path) # 保存修改
整体代码如下:
import cv2
import os
import random
import shutil
from shutil import copyfile
import xml.etree.ElementTree as ET
from decimal import Decimal
def xml_check(now_h, now_w, xml_path, save_xml_path, heigh_start, heigh_end, width_start, width_end):
# 打开xml文档
doc = ET.parse(xml_path)
root = doc.getroot()
size_root = root.find("size")
size_root[0].text = str(now_w)
size_root[1].text = str(now_h)
for child in root.findall('object'): # 找到图片中的所有框
sub = child.find('bndbox') # 找到框的标注值并进行读取
# 宽 xmin
if int(sub[0].text) < width_start:
sub[0].text = '0'
elif int(sub[0].text) > width_end:
sub[0].text = str(width_end)
else:
sub[0].text = str(int(sub[0].text) - width_start)
# 高 ymin
if int(sub[1].text) < heigh_start:
sub[1].text = '0'
elif int(sub[1].text) > heigh_end:
sub[1].text = str(heigh_end)
else:
sub[1].text = str(int(sub[1].text) - heigh_start)
# 宽 xmax
if int(sub[2].text) < width_start:
sub[2].text = '0'
elif int(sub[2].text) > width_end:
sub[2].text = str(width_end)
else:
sub[2].text = str(int(sub[2].text) - width_start)
# 高 ymax
if int(sub[3].text) < heigh_start:
sub[3].text = '0'
elif int(sub[3].text) > heigh_end:
sub[3].text = str(heigh_end)
else:
sub[3].text = str(int(sub[3].text) - heigh_start)
if int(sub[2].text) - int(sub[0].text) < 5 or int(sub[3].text) - int(sub[1].text) < 5:
root.remove(child)
doc.write(save_xml_path) # 保存修改
def rm_mkdir(path):
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
def txt_check(txt_path, save_txt_path, heigh_strat, heigh_end, width_start, width_end, heigh, width):
fp1 = open(txt_path, 'r+')
for line in fp1.readlines():
line_new = []
line = line.split(' ')
with open(save_txt_path, "a+") as fp2:
old_heigh_strat, old_heigh_end = float(line[2]) * heigh, (float(line[2]) + float(line[4])) * heigh
new_heigh_strat = old_heigh_strat - heigh_strat if old_heigh_strat > heigh_strat else 0 #新的图片框所在的像数点高度起始位置
new_heigh_end = old_heigh_end - heigh_strat if old_heigh_end > heigh_strat else 0 #新的图片框所在的像数点高度结束位置
line_new2 = str(float(new_heigh_strat) / (heigh_end - heigh_strat))
line_new4 = float(new_heigh_end - new_heigh_strat) / (heigh_end - heigh_strat)
if line_new4 < 0:
line_new4 = '0'
elif line_new4 < 1:
line_new4 = str(line_new4)
else:
line_new4 = '1'
old_width_strat, old_width_end = float(line[1]) * width, (float(line[1]) + float(line[3])) * width
new_width_strat = old_width_strat - width_start if old_width_strat > width_start else 0 #新的图片框所在的像数点宽度起始位置
new_width_end = old_width_end - width_start if old_width_end > width_start else 0 #新的图片框所在的像数点宽度结束位置
line_new1 = str(float(new_width_strat) / (width_end - width_start))
line_new3 = float(new_width_end - new_width_strat) / (width_end - width_start)
if line_new3 < 0:
line_new3 = '0'
elif line_new3 < 1:
line_new3 = str(line_new4)
else:
line_new3 = '1'
line_new.append(line[0])
line_new.append(line_new1)
line_new.append(line_new2)
line_new.append(line_new3)
line_new.append(line_new4)
line_new = map(str, line_new)
line_new = " ".join(line_new)
if float(line_new3) > 0.001 and float(line_new4) > 0.001:
fp2.write(line_new + '\n')
fp2.close()
def img_cut(img, img_save_path, heigh_strat, heigh_end, width_start, width_end):
dst = img[heigh_strat:heigh_end, width_start:width_end] #
now_h, now_w, _ = dst.shape
cv2.imwrite(img_save_path, dst)
return now_h, now_w
def imgs_seek(start_path):
photo_list = [] # 图片列表
def imgs_seek(start_path):
os.chdir(start_path) # 查找图片的途径
items = os.listdir(os.curdir) # 该路径下所有文件
photo_type = ['.jpg', '.JPG', '.JEPG', 'jepg'] # 添加查找图片的格式 注意加. 如'.JPG'
for each in items:
if os.path.splitext(each)[1] in photo_type:
photo_list.append(os.getcwd() + os.sep + each)
if os.path.isdir(each):
imgs_seek(each) # 递归函数查找下一目录下的文件
os.chdir(os.pardir) # 返回上一层文件目录
return photo_list
imgs_seek(start_path)
return photo_list
if __name__ == "__main__":
imgs_path = r'D:\data\road_datas\train\images' #原始图片路径
old_txt_path = r'D:\data\road_datas\train\all_txt' #原始txt标签路径(yolo)
old_xml_path = r'D:\data\road_datas\train\all_xml' #原始xml标签路径(VOC)
save_path = r'D:\Desktop\test_cut\save2' #保存路径
rm_mkdir(save_path)
cut_imgs_save = os.path.join(save_path, 'images') #保存路径中图片路径
labels_path = os.path.join(save_path, 'labels') #保存路径中txt标签路径
annotations_path = os.path.join(save_path, 'annotations') #保存路径中xml标签路径
heigh_strat, heigh_end, width_start, width_end = 400, 1000, 200, 1200 #图片裁剪尺寸,参考opencv img[y0:y1, x0:x1]
rm_mkdir(cut_imgs_save)
rm_mkdir(labels_path)
rm_mkdir(annotations_path)
imgs_list = imgs_seek(imgs_path)
img_num = len(imgs_list)
set_percent = 1 #图片裁剪数量比例
random.seed(8)
random.shuffle(imgs_list)
cut_imgs_list = imgs_list[:int(img_num * set_percent)]
origin_img_list = imgs_list[int(img_num * set_percent):]
cut_list_num = []
finish_num = 0
origin_num = 0
total_cut_num = len(cut_imgs_list)
origin_total_num = img_num - total_cut_num
print("在%s文件夹下共找到%d个图片文件,共裁剪%d个文件"%(imgs_path, img_num, total_cut_num))
#裁剪图片及相应的标签
for cut_img_path in cut_imgs_list:
img_id = (cut_img_path.split('.'))[0].split('\\')[-1]
cut_list_num.append(img_id)
cut_img_save_path = os.path.join(cut_imgs_save, img_id + '.jpg')
img = cv2.imread(cut_img_path)
h, w, _ = img.shape
h_s, h_e, w_s, w_e = heigh_strat, h, 0, w #############
n_h, n_w = img_cut(img, cut_img_save_path, h_s, h_e, w_s, w_e)
old_laebl_path = os.path.join(old_txt_path, img_id + '.txt')
save_label_path = os.path.join(labels_path, img_id + '.txt')
old_xml_path1 = os.path.join(old_xml_path, img_id + '.xml')
save_xml_path = os.path.join(annotations_path, img_id + '.xml')
xml_check(n_h, n_w, old_xml_path1, save_xml_path, h_s, h_e, w_s, w_e)
txt_check(old_laebl_path, save_label_path, h_s, h_e, w_s, w_e, h, w)
finish_num += 1
print("裁剪图片和标签已完成:%s/%s,总共为%s/%s" % (finish_num, total_cut_num, finish_num, img_num))
#copy图片及标签
for origin_img_path in origin_img_list:
img_id = (origin_img_path.split('.'))[0].split('\\')[-1]
origin_img_save_path = os.path.join(cut_imgs_save, img_id + '.jpg')
old_laebl_path = os.path.join(old_txt_path, img_id + '.txt')
old_xml_path2 = os.path.join(old_xml_path, img_id + '.xml')
origin_label_save_path = os.path.join(labels_path, img_id + '.txt')
origin_xml_save_path = os.path.join(annotations_path, img_id + '.xml')
copyfile(origin_img_path, origin_img_save_path)
copyfile(old_laebl_path, origin_label_save_path)
copyfile(old_xml_path2, origin_xml_save_path)
finish_num += 1
origin_num += 1
print("保存原图片和标签已完成:%s/%s,总共为%s/%s" % (origin_num, origin_total_num, finish_num, img_num))
裁剪后的标签及图片结果如下: