目标检测图像增强

https://blog.csdn.net/wei_guo_xd/article/details/74199729

常用的图像扩充方式有:

水平翻转,裁剪,视角变换,jpeg压缩,尺度变换,颜色变换,旋转

当用于分类数据集时,这些变换方法可以全部被使用,然而考虑到目标检测标注框的变换,我们选择如下几种方式用于目标检测数据集扩充:

jpeg压缩,尺度变换,颜色变换

这里,我们介绍一个图象变换包

http://lear.inrialpes.fr/people/paulin/projects/ITP/

这是项目主页,里面介绍了用于图像变换的基本方法,以及如何组合它们可以得到最好的效果,项目主页里同时带python程序。

里面的图像变换程序如下(用于windows下,用于目标检测时,做了一些修改):

 

 
  1. import os, sys, pdb, numpy

  2. from PIL import Image,ImageChops,ImageOps,ImageDraw

  3.  
  4. #parameters used for the CVPR paper

  5. NCROPS = 10

  6. NHOMO = 8

  7. JPG=[70,50,30]

  8. ROTS = [3,6,9,12,15]

  9. SCALES=[1.5**0.5,1.5,1.5**1.5,1.5**2,1.5**2.5]

  10. #parameters computed on ILSVRC10 dataset

  11. lcolor = [ 381688.61379382 , 4881.28307136, 2316.10313483]

  12. pcolor = [[-0.57848371, -0.7915924, 0.19681989],

  13. [-0.5795621 , 0.22908373, -0.78206676],

  14. [-0.57398987 , 0.56648223 , 0.59129816]]

  15.  
  16. #pre-generated gaussian values

  17. alphas = [[0.004894 , 0.153527, -0.012182],

  18. [-0.058978, 0.114067, -0.061488],

  19. [0.002428, -0.003576, -0.125031]]

  20.  
  21. def gen_colorimetry(i):

  22. p1r = pcolor[0][0]

  23. p1g = pcolor[1][0]

  24. p1b = pcolor[2][0]

  25. p2r = pcolor[0][1]

  26. p2g = pcolor[1][1]

  27. p2b = pcolor[2][1]

  28. p3r = pcolor[0][2]

  29. p3g = pcolor[1][2]

  30. p3b = pcolor[2][2]

  31.  
  32. l1 = numpy.sqrt(lcolor[0])

  33. l2 = numpy.sqrt(lcolor[1])

  34. l3 = numpy.sqrt(lcolor[2])

  35.  
  36. if i<=3:

  37. alpha = alphas[i]

  38. else:

  39. numpy.random.seed(i*3)

  40. alpha = numpy.random.randn(3,0,0.01)

  41. a1 = alpha[0]

  42. a2 = alpha[1]

  43. a3 = alpha[2]

  44.  
  45. return (a1*l1*p1r + a2*l2*p2r + a3*l3*p3r,

  46. a1*l1*p1g + a2*l2*p2g + a3*l3*p3g,

  47. a1*l1*p1b + a2*l2*p2b + a3*l3*p3b)

  48.  
  49. def gen_crop(i,w,h):

  50. numpy.random.seed(4*i)

  51. x0 = numpy.random.random()*(w/4)

  52. y0 = numpy.random.random()*(h/4)

  53. x1 = w - numpy.random.random()*(w/4)

  54. y1 = h - numpy.random.random()*(h/4)

  55.  
  56. return (int(x0),int(y0),int(x1),int(y1))

  57.  
  58. def gen_homo(i,w,h):

  59. if i==0:

  60. return (0,0,int(0.125*w),h,int(0.875*w),h,w,0)

  61. elif i==1:

  62. return (0,0,int(0.25*w),h,int(0.75*w),h,w,0)

  63. elif i==2:

  64. return (0,int(0.125*h),0,int(0.875*h),w,h,w,0)

  65. elif i==3:

  66. return (0,int(0.25*h),0,int(0.75*h),w,h,w,0)

  67. elif i==4:

  68. return (int(0.125*w),0,0,h,w,h,int(0.875*w),0)

  69. elif i==5:

  70. return (int(0.25*w),0,0,h,w,h,int(0.75*w),0)

  71. elif i==6:

  72. return (0,0,0,h,w,int(0.875*h),w,int(0.125*h))

  73. elif i==7:

  74. return (0,0,0,h,w,int(0.75*h),w,int(0.25*h))

  75. else:

  76. assert False

  77.  
  78.  
  79. def rot(image,angle,fname):

  80. white = Image.new('L',image.size,"white")

  81. wr = white.rotate(angle,Image.NEAREST,expand=0)

  82. im = image.rotate(angle,Image.BILINEAR,expand=0)

  83. try:

  84. image.paste(im,wr)

  85. except ValueError:

  86. print >>sys.stderr, 'error: image do not match '+fname

  87. return image

  88.  
  89. def gen_corner(n, w, h):

  90. x0 = 0

  91. x1 = w

  92. y0 = 0

  93. y1 = h

  94.  
  95. rat = 256 - 227

  96.  
  97. if n == 0: #center

  98. x0 = (rat*w)/(2*256.0)

  99. y0 = (rat*h)/(2*256.0)

  100. x1 = w - (rat*w)/(2*256.0)

  101. y1 = h - (rat*h)/(2*256.0)

  102. elif n == 1:

  103. x0 = (rat*w)/256.0

  104. y0 = (rat*h)/256.0

  105. elif n == 2:

  106. x1 = w - (rat*w)/256.0

  107. y0 = (rat*h)/256.0

  108. elif n == 3:

  109. x1 = w - (rat*w)/256.0

  110. y1 = h - (rat*h)/256.0

  111. else:

  112. assert n==4

  113. x0 = (rat*w)/256.0

  114. y1 = h - (rat*h)/256.0

  115.  
  116. return (int(x0),int(y0),int(x1),int(y1))

  117.  
  118. #the main fonction to call

  119. #takes a image input path, a transformation and an output path and does the transformation

  120. def gen_trans(imgfile,trans,outfile):

  121. for trans in trans.split('*'):

  122. image = Image.open(imgfile)

  123. w,h = image.size

  124. if trans=="plain":

  125. image.save(outfile,"JPEG",quality=100)

  126. elif trans=="flip":

  127. ImageOps.mirror(image).save(outfile,"JPEG",quality=100)

  128. elif trans.startswith("crop"):

  129. c = int(trans[4:])

  130. image.crop(gen_crop(c,w,h)).save(outfile,"JPEG",quality=100)

  131. elif trans.startswith("homo"):

  132. c = int(trans[4:])

  133. image.transform((w,h),Image.QUAD,

  134. gen_homo(c,w,h),

  135. Image.BILINEAR).save(outfile,"JPEG",quality=100)

  136. elif trans.startswith("jpg"):

  137. image.save(outfile,quality=int(trans[3:]))

  138. elif trans.startswith("scale"):

  139. scale = SCALES[int(trans.replace("scale",""))]

  140. image.resize((int(w/scale),int(h/scale)),Image.BILINEAR).save(outfile,"JPEG",quality=100)

  141. elif trans.startswith('color'):

  142. (dr,dg,db) = gen_colorimetry(int(trans[5]))

  143. table = numpy.tile(numpy.arange(256),(3))

  144. table[ :256]+= (int)(dr)

  145. table[256:512]+= (int)(dg)

  146. table[512: ]+= (int)(db)

  147. image.convert("RGB").point(table).save(outfile,"JPEG",quality=100)

  148. elif trans.startswith('rot-'):

  149. angle =int(trans[4:])

  150. for i in range(angle):

  151. image = rot(image,-1,outfile)

  152. image.save(outfile,"JPEG",quality=100)

  153. elif trans.startswith('rot'):

  154. angle =int(trans[3:])

  155. for i in range(angle):

  156. image = rot(image,1,outfile)

  157. image.save(outfile,"JPEG",quality=100)

  158. elif trans.startswith('corner'):

  159. i = int(trans[6:])

  160. image.crop(gen_corner(i,w,h)).save(outfile,"JPEG",quality=100)

  161. else:

  162. assert False, "Unrecognized transformation: "+trans

  163. imgfile = outfile # in case we iterate

  164.  
  165.  
  166. #Our 41 transformations used in the CVPR paper

  167. def get_all_trans():

  168. # transformations = (["plain","flip"]

  169. # # +["crop%d"%i for i in range(NCROPS)]

  170. # # +["homo%d"%i for i in range(NHOMO)]

  171. # +["jpg%d"%i for i in JPG]

  172. # +["scale0","scale1","scale2","scale3","scale4"]

  173. # +["color%d"%i for i in range(3)]

  174. # # +["rot-%d"%i for i in ROTS]

  175. # # +["rot%d"%i for i in ROTS]

  176. # )+["scale0","scale1","scale2","scale3","scale4"]

  177. transformations=(["plain"]

  178. + ["jpg%d" % i for i in JPG]

  179. + ["scale0", "scale1", "scale2", "scale3", "scale4"]

  180. + ["color%d" % i for i in range(3)])

  181. return transformations

  182.  
  183. #transformations used at test time in deep architectures

  184. def get_deep_trans():

  185. return ['corner0','corner1','corner2','corner3','corner4','corner0*flip','corner1*flip','corner2*flip','corner3*flip','corner4*flip']

  186.  
  187. if __name__=="__main__":

  188. inputpath = sys.argv[1]

  189. name = [name for name in os.listdir(inputpath) if os.path.isfile(os.path.join(inputpath,name))]

  190. #img_input = sys.argv[1]

  191. outpath = sys.argv[2]

  192. if len(sys.argv)>= 4:

  193. trans = sys.argv[3]

  194. if not trans.startswith("["):

  195. trans = [trans]

  196. else:

  197. trans = eval(trans)

  198. else:

  199. trans = get_all_trans()

  200. print "Generating transformations and storing in %s"%(outpath)

  201. for k in name:

  202. for t in trans:

  203. img_input=inputpath+'\\'+k

  204. gen_trans(img_input,t,outpath+'\\%s_%s.jpg'%(".".join(img_input.split("\\")[-1].split(".")[:-1]),t))

  205. #gen_trans(k, t, outpath + '\\%s_%s.jpg' % (".".join(k.split(".")[:-1]), t))

  206. print "Finished. Transformations generated: %s"%(" ".join(trans))

修改xml文件的程序如下;

 

 
  1. # -*- coding=utf-8 -*-

  2. import os

  3. import sys

  4. import shutil

  5. from xml.dom.minidom import Document

  6. from xml.etree.ElementTree import ElementTree,Element

  7. import xml.dom.minidom

  8. JPG=[70,50,30]

  9. SCALES=[1.5**0.5,1.5,1.5**1.5,1.5**2,1.5**2.5]

  10.  
  11. #产生变换后的xml文件

  12. def gen_xml(xml_input,trans,outfile):

  13. for trans in trans.split('*'):

  14. if trans=="plain" or trans.startswith("jpg") or trans.startswith('color'):#如果是这几种变换,直接修改xml文件名就好

  15. dom = xml.dom.minidom.parse(xml_input)

  16. root = dom.documentElement

  17. filenamelist = root.getElementsByTagName('filename')

  18. filename = filenamelist[0]

  19. c = str(filename.firstChild.data)

  20. d = ".".join(outfile.split("\\")[-1].split(".")[:-1]) + '.jpg'

  21. filename.firstChild.data = d

  22. f = open(outfile, 'w')

  23. dom.writexml(f, encoding='utf-8')

  24. elif trans.startswith("scale"):#对于尺度变换,xml文件信息也需要改变

  25. scale = SCALES[int(trans.replace("scale", ""))]

  26. dom=xml.dom.minidom.parse(xml_input)

  27. root=dom.documentElement

  28. filenamelist=root.getElementsByTagName('filename')

  29. filename=filenamelist[0]

  30. c=str(filename.firstChild.data)

  31. d=".".join(outfile.split("\\")[-1].split(".")[:-1])+'.jpg'

  32. filename.firstChild.data=d

  33. heightlist = root.getElementsByTagName('height')

  34. height = heightlist[0]

  35. a = int(height.firstChild.data)

  36. b = str(int(a / scale))

  37. height.firstChild.data = b

  38. widthlist=root.getElementsByTagName('width')

  39. width=widthlist[0]

  40. a = int(width.firstChild.data)

  41. b = str(int(a / scale))

  42. width.firstChild.data=b

  43. objectlist=root.getElementsByTagName('xmin')

  44. for object in objectlist:

  45. a=int(object.firstChild.data)

  46. b=str(int(a/scale))

  47. object.firstChild.data=b

  48. objectlist = root.getElementsByTagName('ymin')

  49. for object in objectlist:

  50. a = int(object.firstChild.data)

  51. b = str(int(a / scale))

  52. object.firstChild.data = b

  53. objectlist = root.getElementsByTagName('xmax')

  54. for object in objectlist:

  55. a = int(object.firstChild.data)

  56. b = str(int(a / scale))

  57. object.firstChild.data = b

  58. objectlist = root.getElementsByTagName('ymax')

  59. for object in objectlist:

  60. a = int(object.firstChild.data)

  61. b = str(int(a / scale))

  62. object.firstChild.data = b

  63. f=open(outfile,'w')

  64. dom.writexml(f,encoding='utf-8')

  65. else:

  66. assert False, "Unrecognized transformation: "+trans

  67.  
  68. #产生各种变换名

  69. def get_all_trans():

  70. transformations=(["plain"]

  71. + ["jpg%d" % i for i in JPG]

  72. + ["scale0", "scale1", "scale2", "scale3", "scale4"]

  73. + ["color%d" % i for i in range(3)])

  74. return transformations

  75.  
  76. if __name__=="__main__":

  77. inputpath = sys.argv[1]

  78. name = [name for name in os.listdir(inputpath) if os.path.isfile(os.path.join(inputpath,name))]

  79. outpath = sys.argv[2]

  80. if len(sys.argv)>= 4:

  81. trans = sys.argv[3]

  82. if not trans.startswith("["):

  83. trans = [trans]

  84. else:

  85. trans = eval(trans)

  86. else:

  87. trans = get_all_trans()

  88. print "Generating transformations and storing in %s"%(outpath)

  89. for k in name:

  90. for t in trans:

  91. xml_input=inputpath+'\\'+k

  92. gen_xml(xml_input,t,outpath+'\\%s_%s.xml'%(".".join(xml_input.split("\\")[-1].split(".")[:-1]),t))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
YOLOv5是一种先进的目标检测算法,可以用于检测图像中的不同对象。在进行目标检测之前,可以使用图像增强技术来提高模型的性能和鲁棒性。 在Python中,我们可以使用OpenCV库来实现图像增强。下面是一个简单的代码示例,用于对图像进行亮度和对比度的调整: ```python import cv2 import numpy as np def adjust_brightness_contrast(image, brightness, contrast): # 将图像转换为浮点数 image = image.astype(np.float32) # 调整亮度 image = image * (1 + brightness) # 调整对比度 image = image * contrast # 将图像截取到0-255的范围内 image = np.clip(image, 0, 255) # 将图像转换为8位整数 image = image.astype(np.uint8) return image # 读取图像 image = cv2.imread("image.jpg") # 调整亮度和对比度 brightness = 0.2 contrast = 1.5 enhanced_image = adjust_brightness_contrast(image, brightness, contrast) # 显示原始图像和增强后的图像 cv2.imshow("Original Image", image) cv2.imshow("Enhanced Image", enhanced_image) cv2.waitKey(0) cv2.destroyAllWindows() ``` 该代码首先定义了一个名为`adjust_brightness_contrast`的函数,以图像、亮度和对比度作为参数。在函数内部,图像被转换为浮点数并根据给定的亮度和对比度进行调整。然后,图像被截取到0-255的范围内,并转换为8位整数。最后,通过调用`imshow`函数来显示原始图像和增强后的图像。 这只是图像增强的一个简单示例,还可以使用其他技术,如直方图均衡化、高斯模糊、尺度变换等来增强图像。根据具体的需求和图像特征,可以选择适合的增强方法来优化YOLOv5目标检测的性能。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值