如果直接把大图片扔给深度学习模型,由于 resize ,导致很多目标变得很小,不利于 识别和训练,所以把 包含目标的区域给抠出来。加载速度也会快很多。
注意,不要每个目标抠一个图,这样很可能出现 每个抠出来的图片包含多个目标,导致多个同样的目标却出现在多个图片之中,造成某种冗余。
# -*- coding: utf-8 -*-
"""
Created on Wed May 29 16:08:42 2019
这是把 trainLight 里的灯 给抠出来,生成到新的目录中
新图片为500*500, 尽可能包含新的信号灯,包含不了的话,就再起一个500*500的图片
@author: haithink
"""
import cv2
import os
# 原始大图
ttkJpgsPath = "E:\\Dataset\\imgs\\"
ttkTxtPath = "E:\\Dataset\\yolo\\"
# 抠出来的图
NewTxtPah = "E:\\Dataset\\imgs500\\"
NewJpgsPah = "E:\\Dataset\\imgs500Txt\\"
files = os.listdir(ttkJpgsPath)
iCtrl = 0
counter = 0
NEWSIZE = 500
for file in files:
if not os.path.isdir(file):
print("handle ", file)
srcImg = cv2.imread(ttkJpgsPath+file)
height, width, channels = srcImg.shape
#print(height)
srcTxt = open(ttkTxtPath + file.split(".")[0] + ".txt")
#print(ttkTxtPath + file.split(".")[0] + ".txt")
iter_f = iter(srcTxt)
# 先搜集 所有灯框信息, 然后统一处理
leftTopX = []
leftTopY = []
rightBotX = []
rightBotY = []
lightsW = []
lightsH = []
xCenters = []
yCenters = []
for line in iter_f:
row = line.split(" ")
classIdx = int(row[0])
xcenter = int(float(row[1]) * width)
ycenter = int(float(row[2]) * height)
lightW = int(float(row[3]) * width)
lightH = int(float(row[4]) * height)
# 过大的信号灯就不要了,不然 后面可能死循环
if(lightW > 300 or lightH > 300):
continue
# 获取某个灯的位置
leftX = max(0, int((xcenter-lightW/2)))
leftY = max(0, int((ycenter-lightH/2)))
rightX = min(width-1, int((xcenter+lightW/2)))
leftBottomY = min(height-1, int((ycenter+lightH/2)))
#print(leftX, " " , leftY, " ", rightX, " ", leftBottomY)
leftTopX.append(leftX)
leftTopY.append(leftY)
rightBotX.append(rightX)
rightBotY.append(leftBottomY)
lightsW.append(lightW)
lightsH.append(lightH)
xCenters.append(xcenter)
yCenters.append(ycenter)
while len(leftTopX) > 0:
minX = min(leftTopX)
minY = min(leftTopY)
# 截取一个 500 * 500 的图像,
imgLeftX = max(0, minX - 100)
imgRightX = min(width-1, imgLeftX + NEWSIZE)
imgTopY = max(0, minY - 100)
imgBotY = min(height-1, imgTopY + NEWSIZE)
light = srcImg[imgTopY:imgBotY, imgLeftX:imgRightX]
finalNameBase = str(counter) + "_";
counter = counter + 1
finalNameJpg = finalNameBase + ".jpg"
cv2.imwrite(NewJpgsPah + finalNameJpg, light)
# 获取完全被这个图像包含的所有 灯框,生成 txt, 然后删除
newTxt = open(NewTxtPah + finalNameBase+".txt", "w")
# 倒序删除, 免得出问题
for i in range(len(leftTopX)-1, -1, -1):
#print(leftTopX[i], " " , leftTopY[i], " ", rightBotX[i], " ", rightBotY[i])
#print(imgLeftX, " " , imgTopY, " ", imgRightX, " ", imgBotY)
if leftTopX[i] >= imgLeftX and leftTopY[i] >= imgTopY and rightBotX[i] <= imgRightX and rightBotY[i] <= imgBotY:
# 对坐标进行修正, 生成 新的 同名txt 文件
xcenter = (xCenters[i] - imgLeftX) / NEWSIZE
ycenter = (yCenters[i] - imgTopY) / NEWSIZE
lightW = (lightsW[i]) / NEWSIZE
lightH = (lightsH[i]) / NEWSIZE
info = row[0] + " " + str(xcenter) + " " + str(ycenter) + " " + str(lightW) + " " + str(lightH) + "\n"
newTxt.write(info)
del leftTopX[i]
del leftTopY[i]
del rightBotX[i]
del rightBotY[i]
del xCenters[i]
del yCenters[i]
del lightsW[i]
del lightsH[i]
newTxt.close()
#iCtrl = iCtrl+1
#print("i is ", iCtrl)
if(iCtrl > 1):
break
#print(file.split(".")[0])
print("end", file)
srcTxt.close()