1.txt转换为xml格式
这位真的是神啊,救了我的狗命了,试了好多代码,只有他的成功。
2.扩充数据集
新建 data-augmention.py
修改代码497行左右,这部分文件路径
需要四个文件夹的路径:
source_img_path :图片原始位置
source_xml_path:xml的原始位置
save_img_path :图片增强结果保存文件
save_xml_path: xml增强结果保存文件
修改完运行如下
3.xml转换txt
将生成的xml标签再次转换为txt标签,
深度学习——xml格式转txt格式数据集_将xml文件转换为txt文件-CSDN博客
总结:
使用增强后的数据集放入yolov8中训练,精度没有任何提升。
原因分析:
询问过其他大佬后得知,yolov8自身的数据增强技术会替换掉原有的图片,只使用增强后的图片来训练。因此,本文所做的数据增强,让图像训练达到了过拟合。
关闭了yolov8所有的数据增强技术后,使用原有数据集训练,精度降低了不少。
建议:
1.yolov8本身就有很多种基础的数据增强技术,且对项目是有益的,可以在超参数中设置,平时使用足够了。
2.数据增强对本文的项目作用不大,说明只能添加新的数据集来提升精度。
报错:
1.txt转换xml失败代码之一:AttributeError: 'NoneType' object has no attribute 'shape'
我一开始的转换代码是参考这位 txt2xml -- .txt转.xml标注格式_联沐的博客-CSDN博客
好的,报错 。
(yolov8) root@wsjdy-08:/home/wsjdy/yyt/nfshare/yolov8# python txt-xml.py
[ WARN:0@0.062] global loadsave.cpp:244 findDecoder imread_('/home/wsjdy/yyt/nfshare/datasets/VOC/data-augmention/img/000005547.jpg'): can't open/read file: check file path/integrity
Traceback (most recent call last):
File "/home/wsjdy/yyt/nfshare/yolov8/txt-xml.py", line 120, in <module>
makexml(picPath, txtPath, xmlPath)
File "/home/wsjdy/yyt/nfshare/yolov8/txt-xml.py", line 29, in makexml
Pheight, Pwidth, Pdepth = img.shape
AttributeError: 'NoneType' object has no attribute 'shape'
打开文件发现28-29行有问题
更换代码
# img = cv2.imread(picPath + name[0:-4] + ".jpg")
img = cv2_readimg(picPath + name[0:-4] + ".jpg")
没用。
分析原因:
参考https://wenku.csdn.net/answer/27f393aa00164e0faabde0b1ba3c1e6e
改代码是先根据txt文件名来检索img文件的,有些图片我删掉了,但是txt的标注还在,所以代码找不到图像
解决:
深度学习图片数量较少扩充数据集的方法_深度学习图片数据不够-CSDN博客
添加代码,如果图像不存在,就删掉改txt文件
if img is None:
os.remove(txtpath+name)
continue
报错是解决了,但是生成不了xml文件。
但是为什么我不用这代码呢,因为仿真之后我的文件夹里又一个xml文件都没有生成,再见。
2.txt转换xml失败代码之二——标签类别混乱
这是我使用的第二种txt转换xml代码
参考yolov5的txt文件转xml文件格式(详细解释与完整代码供应)_yolo转xml_tangjunjun-owen的博客-CSDN博客
import os
import cv2
from tqdm import tqdm
from lxml.etree import Element, SubElement, tostring, ElementTree
from xml.dom.minidom import parseString
import numpy as np
def build_dir(out_dir):
if not os.path.exists(out_dir):
os.mkdir(out_dir)
return out_dir
def get_root_lst(root, suffix='jpg', suffix_n=3):
root_lst, name_lst = [], []
for dir, file, names in os.walk(root):
root_lst = root_lst + [os.path.join(dir, name) for name in names if name[-suffix_n:] == suffix]
name_lst = name_lst + [name for name in names if name[-suffix_n:] == suffix]
return root_lst, name_lst
def read_txt(path):
txt_info_lst = []
with open(path, "r", encoding='utf-8') as f:
for line in f:
txt_info_lst.append(list(line.strip('\n').split()))
txt_info_lst = np.array(txt_info_lst)
return txt_info_lst
def product_xml(name_img, boxes, codes, img=None, wh=None):
'''
:param img: 以读好的图片
:param name_img: 图片名字
:param boxes: box为列表
:param codes: 为列表
:return:
'''
if img is not None:
width = img.shape[0]
height = img.shape[1]
else:
assert wh is not None
width = wh[0]
height = wh[1]
# print('xml w:{} h:{}'.format(width,height))
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'VOC2007'
node_filename = SubElement(node_root, 'filename')
node_filename.text = name_img # 图片名字
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = str(width)
node_height = SubElement(node_size, 'height')
node_height.text = str(height)
node_depth = SubElement(node_size, 'depth')
node_depth.text = '3'
for i, code in enumerate(codes):
box = [boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = code
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = str(int(box[0]))
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = str(int(box[1]))
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = str(int(box[2]))
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = str(int(box[3]))
xml = tostring(node_root, pretty_print=True) # 格式化显示,该换行的换行
dom = parseString(xml)
name = name_img[:-4] + '.xml'
tree = ElementTree(node_root)
# print('name:{},dom:{}'.format(name, dom))
return tree, name
def yolov5txt2xml(root_data, txt_root, gt_labels=None,out_dir=None):
# 获得图像与txt的路径与名称的列表
img_roots_lst, img_names_lst = get_root_lst(root_data, suffix='jpg', suffix_n=3)
txt_roots_lst, txt_names_lst = get_root_lst(txt_root, suffix='txt', suffix_n=3)
# 创建保存xml的文件
out_dir = build_dir(out_dir) if out_dir is not None else build_dir(os.path.join(txt_root, 'out_dir_xml'))
label_str_lst = []
# 通过图像遍历
for i, img_root in tqdm(enumerate(img_roots_lst)):
# 获得图像名称,并得到对应txt名称
img_name = img_names_lst[i]
txt_name = img_name[:-3] + 'txt'
if txt_name in txt_names_lst: # 通过图像获得txt名称是否存在,存在则继续,否则不继续
txt_index = list(txt_names_lst).index(str(txt_name)) # 获得列表txt对应索引,以便后续获得路径
# 通过图像获得图像高与宽
img = cv2.imread(img_root)
height, width = img.shape[:2]
# 读取对应txt的信息
txt_info = read_txt(txt_roots_lst[txt_index])
# 以下获得txt信息,并保存labels_lst与boxes_lst中,且一一对应
labels_lst, boxes_lst = [], []
for info in txt_info:
label_str = str(info[0])
if label_str not in label_str_lst:
label_str_lst.append(label_str)
x, y, w, h = float(info[1]) * width, float(info[2]) * height, float(info[3]) * width, float(
info[4]) * height
xmin, ymin, xmax, ymax = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
labels_lst.append(label_str)
boxes_lst.append([xmin, ymin, xmax, ymax])
# 是否转换信息
if gt_labels: # gt_labels需要和txt类别对应
labels_lst=[gt_labels[int(lb)] for lb in labels_lst]
# 构建xml文件
if len(labels_lst) > 0:
tree, xml_name = product_xml(img_name, boxes_lst, labels_lst, wh=[w, h])
tree.write(os.path.join(out_dir, xml_name))
print('gt label:', gt_labels)
print('txt label:', label_str_lst)
print('save root:',out_dir)
if __name__ == '__main__':
root_path = r'/home/wsjdy/yyt/nfshare/datasets/VOC/data-augmention/img'
txt_root = r'/home/wsjdy/yyt/nfshare/datasets/VOC/data-augmention/txt'
gt_labels =['fire','smoke','hat','person','truck','dozer','excavator','Hoarding','water_horse']
yolov5txt2xml(root_path, txt_root,gt_labels=gt_labels)
跑出来的类别乱了,我不知道怎么改
3. txt转换xml代码失败之三:xA = max(boxA[0], boxB[0]) IndentationError: unindent does not match any outer indentation level
又一份txt转xml的代码,参考python 批量txt转xml_python txt 转xml_咩咩咩33的博客-CSDN博客
原因参考:
IndentationError: unindent does not match any outer indentation level笔记-CSDN博客
解决:
def下面一行的“ ' ' ' ” ,缩进成如下位置
4. ValueError: invalid literal for int() with base 10: '0.496827'
原因参考
ValueError: invalid literal for int() with base 10问题处理-CSDN博客
尝试:
423行附近的int改为float,但是这样0.多的数值,被近似为0,标注只剩个点
5. xml转txt,只输出类别,没有新建文件
参考如何实现数据增强——扩充数据集-CSDN博客写的转换文件,我只输出了类别,没用生成txt文件
6.txt转xml失败代码之四——生成xml文件中标注只有一个点
参考python 批量txt转xml_python txt 转xml_咩咩咩33的博客-CSDN博客
需要三个文件夹:img(原图片文件夹),txt(txt标注存放的文件),xml(xml标注生成的文件)
新建vim txt-xml.py
只需要修改最后几行" if __name__ == '__main__':"之后的文件夹路径即可,
import os
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement
from PIL import Image
class Xml_make(object):
def __init__(self):
super().__init__()
def __indent(self, elem, level=0):
i = "\n" + level * "\t"
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + "\t"
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.__indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def _imageinfo(self, list_top):
annotation_root = ET.Element('annotation')
# annotation_root.set('verified', 'no')
tree = ET.ElementTree(annotation_root)
# '''
# 0:xml_savepath 1:folder,2:filename,3:path
# 4:checked,5:width,6:height,7:depth
# '''
folder_element = ET.Element('folder')
folder_element.text = list_top[1]
annotation_root.append(folder_element)
filename_element = ET.Element('filename')
filename_element.text = list_top[2]
annotation_root.append(filename_element)
source_element = ET.Element('source')
database_element = SubElement(source_element, 'database')
database_element.text = 'KAIST'
annotation_root.append(source_element)
size_element = ET.Element('size')
width_element = SubElement(size_element, 'width')
width_element.text = str(list_top[3])
height_element = SubElement(size_element, 'height')
height_element.text = str(list_top[4])
depth_element = SubElement(size_element, 'depth')
depth_element.text = str(list_top[5])
annotation_root.append(size_element)
segmented_person_element = ET.Element('segmented')
segmented_person_element.text = '0'
annotation_root.append(segmented_person_element)
return tree, annotation_root
def _bndbox(self, annotation_root, list_bndbox):
for i in range(0, len(list_bndbox), 9):
object_element = ET.Element('object')
name_element = SubElement(object_element, 'name')
name_element.text = list_bndbox[i]
bndbox_element = SubElement(object_element, 'bndbox')
xmin_element = SubElement(bndbox_element, 'xmin')
xmin_element.text = str(list_bndbox[i + 1])
ymin_element = SubElement(bndbox_element, 'ymin')
ymin_element.text = str(list_bndbox[i + 2])
xmax_element = SubElement(bndbox_element, 'xmax')
xmax_element.text = str(list_bndbox[i + 3])
ymax_element = SubElement(bndbox_element, 'ymax')
ymax_element.text = str(list_bndbox[i + 4])
pose_element = SubElement(object_element, 'pose')
pose_element.text = list_bndbox[i + 5]
truncated_element = SubElement(object_element, 'truncated')
truncated_element.text = list_bndbox[i + 6]
difficult_element = SubElement(object_element, 'difficult')
difficult_element.text = list_bndbox[i + 7]
flag_element = SubElement(object_element, 'occlusion')
flag_element.text = list_bndbox[i + 8]
annotation_root.append(object_element)
return annotation_root
def txt_to_xml(self, list_top, list_bndbox):
tree, annotation_root = self._imageinfo(list_top)
annotation_root = self._bndbox(annotation_root, list_bndbox)
self.__indent(annotation_root)
tree.write(list_top[0], encoding='utf-8', xml_declaration=True)
def txt_2_xml(source_path, xml_save_dir, txt_dir):
COUNT = 0
for folder_path_tuple, folder_name_list, file_name_list in os.walk(source_path):
for file_name in file_name_list:
file_suffix = os.path.splitext(file_name)[-1]
if file_suffix != '.jpg':
continue
list_top = []
list_bndbox = []
path = os.path.join(folder_path_tuple, file_name)
xml_save_path = os.path.join(xml_save_dir, file_name.replace(file_suffix, '.xml'))
txt_path = os.path.join(txt_dir, file_name.replace(file_suffix, '.txt'))
filename = os.path.splitext(file_name)[0]
im = Image.open(path)
im_w = im.size[0]
im_h = im.size[1]
width = str(im_w)
height = str(im_h)
depth = '3'
occlusion = '0'
pose = 'unknown'
truncated = '0'
difficult = '0'
list_top.extend([xml_save_path, folder_path_tuple, filename,
width, height, depth])
for line in open(txt_path, 'r'):
line = line.strip()
if line == "% bbGt version=3":
continue
info = line.split(' ')
name = info[0]
xmin = float(info[1])
ymin = float(info[2])
xmax = float(info[3])
ymax = float(info[4])
# xmax = xmin + w
# ymax = ymin + h
# x_cen = float(info[1]) * im_w
# y_cen = float(info[2]) * im_h
# w = float(info[3]) * im_w
# h = float(info[4]) * im_h
# xmin = int(x_cen - w / 2)
# ymin = int(y_cen - h / 2)
# xmax = int(x_cen + w / 2)
# ymax = int(y_cen + h / 2)
list_bndbox.extend([name, str(xmin), str(ymin), str(xmax), str(ymax), pose, truncated, difficult,
occlusion])
Xml_make().txt_to_xml(list_top, list_bndbox)
COUNT += 1
print(COUNT, xml_save_path)
if __name__ == '__main__':
source_path = r'/home/wsjdy/yyt/nfshare/datasets/VOC/data-augmention/img' # txt标注文件所对应的的图片
xml_save_dir = r'/home/wsjdy/yyt/nfshare/datasets/VOC/data-augmention/xml' # 转换为xml标注文件的保存路径
txt_dir = r'/home/wsjdy/yyt/nfshare/datasets/VOC/data-augmention/txt' # 需要转换的txt标注文件
txt_2_xml(source_path, xml_save_dir, txt_dir)
结果,生成3098个xml文件,与图片数量吻合。
一打开,好家伙! 标注只剩一个点了,这是因为我将代码中的init()改为float()导致数值读入错误,但是不该又读入不了,死循环。