BG:crnn做文字识别,训练样本需将图片与该图片的标签放到一个目录下,为便于管理尽可能多的样本,遂对样本进行重新编号。
编号规则:
图片:10位数字+’.[‘jpg’, ‘png’, ‘jpeg’]’
标签:10位数字+’.txt’
1vs1crnn样本重新编号.py
import os
import shutil
# 获取当前目录
dir = os.getcwd()
# path为批量文件的文件夹的路径
path = dir + '/train1'
new_path = dir + '/n_train1'
file_names = os.listdir(path)
# 设置起始编号
count = 1
img_format = ['jpg', 'png', 'jpeg']
for file_name in file_names:
if file_name.split('.')[1] in img_format:
print(file_name)
img_name = file_name
label_name = file_name.split('.')[0] + '.txt'
new_img_name = '0'*(10-len(str(count))) + str(count) + '.' + file_name.split('.')[1]
new_label_name = '0'*(10-len(str(count))) + str(count) + '.txt'
img_path = os.path.join(path, file_name)
label_path = os.path.join(path, label_name)
new_img_path = os.path.join(new_path, new_img_name)
new_label_path = os.path.join(new_path, new_label_name)
try:
shutil.move(img_path, new_img_path)
shutil.move(label_path, new_label_path)
count += 1
except:
pass
print('总计:', count-1)