正常数据集存放的应该是5列数据,第一列类别,后四列为坐标值,如果在训练时候报数据集错误,可以尝试检查一下是不是每一个标注文件都是5列
代码通过判断一行是否有大于4个 ‘ ’来进行判断,并输出有问题的txt名称
import os
folder_path = 'G:\CF-data\data_cfphone\labels-0'
def print_lines_with_spaces(file_path, threshold=4):
with open(file_path, 'r', encoding='utf-8') as file:
for line_number, line in enumerate(file, start=1):
if line.startswith(' ' * threshold):
print(f'{file_path}, Line {line_number}: {line.strip()}')
def process_folder(folder):
for root, dirs, files in os.walk(folder):
for file in files:
if file.endswith('.txt'):
file_path = os.path.join(root, file)
print_lines_with_spaces(file_path)
if __name__ == "__main__":
process_folder(folder_path)
import os
folder_path = 'G:\CF-data\data_cfphone\labels-0'
def print_lines_with_spaces(file_path, threshold=4):
"""
打印文件中以指定个数空格开头的行信息
:param file_path: 文件路径
:param threshold: 空格个数的阈值,默认为4
"""
with open(file_path, 'r', encoding='utf-8') as file:
for line_number, line in enumerate(file, start=1):
if line.startswith(' ' * threshold):
print(f'{file_path}, Line {line_number}: {line.strip()}')
def process_folder(folder):
"""
处理文件夹中的所有txt文件
:param folder: 文件夹路径
"""
for root, dirs, files in os.walk(folder):
for file in files:
if file.endswith('.txt'):
file_path = os.path.join(root, file)
print_lines_with_spaces(file_path)
if __name__ == "__main__":
process_folder(folder_path)