最近在跑yolo,针对高分辨率的数据,目前所有的目标识别的模型精度都不高,因为传入网络后图片会被resize。我想训练一个模型用于分辨4k*8k分辨率的一个全景影像。这个脚本就是把标注的数据集横向平均切割成三个。
import os
import cv2
def slip(file_path,image_name):
image_path = os.path.join(file_path,image_name)
label_path = image_path[:-4]+'.txt'
# 读取原始图片
image = cv2.imread(image_path)
# 切割图片
height, width, _ = image.shape
segment_width = width//3
segments = []
segment_savefile_path = r'D:\22054322g\project2\segment_data'
for i in range(3):
start_x = i * segment_width
end_x = (i + 1) * segment_width
segment = image[:, start_x:end_x]
segments.append(segment)
for i,segment in enumerate(segments):
new_img_name = f'segment{i}_'+image_name
segment_save_path = os.path.join(segment_savefile_path,new_img_name)
cv2.imwrite(segment_save_path,segment)
new_labels = convert_yolo2pix(label_path,height,width)
seg_label1 = []
seg_label2 = []
seg_label3 = []
for label in new_labels:
if label['x'] > 0 and label['x']< segment_width:
label['h'] = label['h']/height
label['w'] = label['w']/segment_width
label['x'] = label['x']/segment_width
label['y'] = label['y']/height
seg_label1.append(label)
elif label['x'] > segment_width and label['x'] < 2*segment_width:
label['h'] = label['h'] / height
label['w'] = label['w'] / segment_width
label['y'] = label['y'] / height
label['x'] = (label['x']-segment_width)/segment_width
seg_label2.append(label)
elif label['x'] > 2*segment_width and label['x'] < width:
label['h'] = label['h'] / height
label['w'] = label['w'] / segment_width
label['y'] = label['y'] / height
label['x'] = (label['x'] - 2*segment_width)/segment_width
seg_label3.append(label)
seg_label1_txt = os.path.join(segment_savefile_path,'segment0_'+image_name[:-4]+'.txt')
write_labels(seg_label1_txt,seg_label1)
seg_label2_txt = os.path.join(segment_savefile_path,'segment1_'+image_name[:-4]+'.txt')
write_labels(seg_label2_txt,seg_label2)
seg_label3_txt = os.path.join(segment_savefile_path, 'segment2_' + image_name[:-4] + '.txt')
write_labels(seg_label3_txt,seg_label3)
def write_labels(label_path,labels):
with open(label_path,'w') as file:
for label in labels:
values = ' '.join(str(value) for value in label.values())
file.write(values + '\n')
def convert_yolo2pix(label_path,height,width):
with open(label_path,'r') as f:
lines = f.readlines()
labels = []
for line in lines:
label_value = line.strip().split(' ')
class_id = int(label_value[0])
new_x = float(label_value[1]) * width
new_y = float(label_value[2]) * height
new_width = float(label_value[3]) * width
new_height =float(label_value[4]) * height
new_label = {"class_id": class_id, "x": new_x, "y": new_y, "w": new_width, "h": new_height}
labels.append(new_label)
return labels
if __name__ == '__main__':
file_path = r"D:\22054322g\project2\all data\big_img"
files = os.listdir(file_path)
for file in files:
if file.endswith(('.jpg','.png')):
img_name = file
slip(file_path,img_name)
具体就是先把图片切成3份,再切label。把yolo的标签还原成像素点坐标,然后根据像素点坐标判断这个目标是在这三块区域中的哪一块。写入对应的label就行
只需要修改main方法中的file_path和slip方法中的segment_savefile_path即可,这里默认图片和标签在一个路径下,输出也是图片和标签在同一个文件夹中
可以用labelimg验证一下,记得输出的文件夹里要放classes文件,不然labelimg不能读取标签。