文章目录
一、之前工作回顾
- 在【python-opencv】 sobel算子 图像边缘检测 图像二值化处理 并进行框标定 这一篇文章中,讨论了如何使用边缘检测算法将图片二值化处理。
- 其中
th, im_th = cv2.threshold(im_in, 40, 255, cv2.THRESH_BINARY)
这一行代码中,40这个值可以根据不同的图片改变,以达到二值化的最佳状态。 - 在这一步,为之后实现图像的框标定,提取出图片中物体的x, y, w, h的值奠定了基础。
- 在【python-opencv】对图片二值化处理后,对图像进行框标定 这篇文章中,讨论了如何对二值化的图片进行框标定,并返回图片中物体的x, y, w, h的值。
- 其中
MIN_BOX = 5000
这个参数应当随着物体大小的改变而改变,对于小的蛾子,参数应当调小。 - 在这一步,为之后将返回的图片中物体的x, y, w, h的值以voc格式写入xml文件提供了基础。
二、图片处理过程
(1)图片的裁剪
- 裁剪掉周围没有物体的地方,裁剪后的图片大小固定
- 图片裁剪.py
# -*- coding: utf-8 -*-
import cv2
import numpy as np
def cut_pic(img):
# 裁剪图片
imgs = cv2.resize(img, (0, 0), fx=0.2, fy=0.2, interpolation=cv2.INTER_NEAREST)[25:25 + 700, 210:210 + 760]
# plt.imshow(img)
return imgs
(2)生成xml文件的相关代码
-
在将图片裁剪完成之后,运行 框标定.py 即可以生成相关的voc数据集格式文件。
-
生成的xml文件格式 详见 数据标注范例
-
生成图片的xml文件.py
# -*- coding:utf-8 -*-
# https://zhuanlan.zhihu.com/p/54269963
# from xml.etree import ElementTree as etree
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
from xml.etree.ElementTree import ElementTree
from xml.dom import minidom
import os
def generate_head(pic_filename, pic_path):
# generate root node
annotation = Element('annotation')
# generate first child-node folder
folder = SubElement(annotation, 'folder')
folder.text = "VOC2007"
# generate second child-node filename
filename = SubElement(annotation, 'filename')
filename.text = pic_filename
# generate child-node path
path = SubElement(annotation, 'path')
path.text = pic_path
# generate third child-node source
source = SubElement(annotation, 'source')
# child-node of source node
database = SubElement(source, 'database')
database.text = "ezi"
# generate fourth child-node source
size = SubElement(annotation, 'size')
# child-node of size
width = SubElement(size, 'width')
width.text = "760"
# child-node of size
height = SubElement(size, 'height')
height.text = "700"
# child-node of size
depth = SubElement(size, 'depth')
depth.text = "3"
# generate fifth child-node folder
segmented = SubElement(annotation, 'segmented')
segmented.text = "0"
return annotation
def generate_object(annotation, pic_label, x, y, w, h):
# generate child-node object of annotation
object = SubElement(annotation, 'object')
# child-node of object
name = SubElement(object, 'name')
name.text = pic_label
# child-node of object
pose = SubElement(object, 'pose')
pose.text = "Unspecified"
# child-node of object
truncated = SubElement(object, 'truncated')
truncated.text = "0"
# child-node of object
difficult = SubElement(object, 'difficult')
difficult.text = "0"
# child-node of object
bndbox = SubElement(object, 'bndbox')
# child-node of bndbox
xmin = SubElement(bndbox, 'xmin')
xmin.text = str(x)
# child-node of bndbox
ymin = SubElement(bndbox, 'ymin')
ymin.text = str(y)
# child-node of bndbox
xmax = SubElement(bndbox, 'xmax')
xmax.text = str(x + w)
# child-node of bndbox
ymax = SubElement(bndbox, 'ymax')
ymax.text = str(y + h)
return annotation
def write_xml(annotation, xml_path):
dir_name = os.path.dirname(xml_path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
# os.path.split(xml_path)[-1] 得到文件名
# open(xml_path, "w").close()
# 打包为tree,以便写入
tree = ElementTree(annotation)
# write out xml data
tree.write(xml_path, encoding='utf-8')
- sobel算子边缘检测.py
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from 图片裁剪 import cut_pic
def fillHole(im_in):
# Threshold.
# Set values equal to or above 40 to 255.
# Set values below 40 to 0.
# https://blog.csdn.net/a19990412/article/details/81172426 thresh的用法
th, im_th = cv2.threshold(im_in, 55, 255, cv2.THRESH_BINARY)
# Copy the thresholded image.
im_floodfill = im_th.copy()
# print(im_th)
# Mask used to flood filling.
# Notice the size needs to be 2 pixels than the image.
h, w = im_th.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
# Floodfill from point (0, 0)
cv2.floodFill(im_floodfill, mask, (0, 0), 255)
# Invert floodfilled image
im_floodfill_inv = cv2.bitwise_not(im_floodfill)
# Combine the two images to get the foreground.
im_out = im_th | im_floodfill_inv
return im_out
def baweraopen(image, size):
'''
@image:单通道二值图,数据类型uint8
@size:欲去除区域大小(黑底上的白区域)
'''
output = image.copy()
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(image)
for i in range(1, nlabels - 1):
regions_size = stats[i, 4]
if regions_size < size:
x0 = stats[i, 0]
y0 = stats[i, 1]
x1 = stats[i, 0] + stats[i, 2]
y1 = stats[i, 1] + stats[i, 3]
for row in range(y0, y1):
for col in range(x0, x1):
if labels[row, col] == i:
output[row, col] = 0
return output
def sobel_cal(imgs):
if imgs.size != 1596000:
# 裁剪图片
imgs = cut_pic(imgs)
# plt.imshow(img)
# 高斯模糊去噪
img = cv2.medianBlur(imgs, 7)
# 高斯模糊去噪
# img = cv2.medianBlur(img, 7)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
H, S, V = cv2.split(hsv)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
lab_l = lab[..., 0]
lab_a = lab[..., 1]
lab_b = lab[..., 2]
# cv2.imshow("image1", lab_b)
color_space = S
# Sobel算子
x = cv2.Sobel(color_space, cv2.CV_16S, 1, 0) # 对x求一阶导
y = cv2.Sobel(color_space, cv2.CV_16S, 0, 1) # 对y求一阶导
absX = cv2.convertScaleAbs(x)
absY = cv2.convertScaleAbs(y)
sobel = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
# b.设置卷积核5*5
kernel = np.ones((2, 2), np.uint8)
# 腐蚀的作用说白了就是让暗的区域变大,而膨胀的作用就是让亮的区域变大
# 图像的膨胀
dst = cv2.dilate(sobel, kernel)
# 空洞填充
out = fillHole(dst)
# c.图像的腐蚀,默认迭代次数
erosion = cv2.erode(out, kernel)
# 图像的膨胀
dst = cv2.dilate(erosion, kernel)
# 去除小的斑点
out = baweraopen(dst, 300)
return imgs, out
if __name__ == '__main__':
# 读取图像
# 此处注意cv2.imread读出来的是bgr格式要转为rgb
img = cv2.imread('img/13.jpg')
img, out = sobel_cal(img)
# 显示图形
# cv2.imwrite('10.jpg', img)
cv2.imshow('out', out)
cv2.waitKey()
- 框标定.py
import cv2
import numpy as np
from sobel算子边缘检测 import sobel_cal
from 生成图片的xml文件 import generate_object, generate_head, write_xml
import os
# 设置最小框的面积
MIN_BOX = 2500
def draw_min_rect_circle(img, cnts, pic_filename, pic_label, pic_path, xml_path): # conts = contours
img = np.copy(img)
annotation = generate_head(pic_filename, pic_path)
for cnt in cnts:
x, y, w, h = cv2.boundingRect(cnt)
if w * h < MIN_BOX:
continue
annotation = generate_object(annotation, pic_label, x, y, w, h)
# print(x, y, w, h)
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) # blue
write_xml(annotation, xml_path)
return img
def run(pic_filename, pic_label, pic_path, xml_path):
imgs = cv2.imread(pic_path) # a black objects on white image is better
# print(pic_path)
# print(imgs)
# img为裁剪过后的图片,image为二值化之后的黑白图片。
imgs, image = sobel_cal(imgs)
# cv2.imshow("contour", image)
# cv2.waitKey()
thresh = cv2.GaussianBlur(image, (5, 5), 0)
thresh = cv2.Canny(thresh, 0, 104)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# print(hierarchy, ":hierarchy")
"""
[[[-1 -1 -1 -1]]] :hierarchy # cv2.Canny()
[[[ 1 -1 -1 -1]
[ 2 0 -1 -1]
[ 3 1 -1 -1]
[-1 2 -1 -1]]] :hierarchy # cv2.threshold()
"""
# 传入的是没有高斯去噪的图片
imgs = draw_min_rect_circle(imgs, contours, pic_filename, pic_label, pic_path, xml_path)
# cv2.imshow("contours", imgs) # cv2.waitKey()
if __name__ == '__main__':
pic_root = "./dataset"
xml_root = "./Annotations"
for i in os.listdir(pic_root):
pic_label = i
for j in os.listdir(os.path.join(pic_root, pic_label)):
pic_filename = j
pic_path = os.path.join(pic_root, pic_label, j)
# 生成到对应的文件夹下,但弄出来很麻烦,我直接生成在annotation里面了
# xml_path = os.path.join(xml_root, pic_label, str(j[:-3] + "xml"))
xml_path = os.path.join(xml_root, str(j[:-3] + "xml"))
print(pic_filename, pic_label, pic_path, xml_path)
run(pic_filename, pic_label, pic_path, xml_path)
三、使用yolov4模型,训练预测蛾子数据集
-
使用生成的xml文件构建蛾子的voc数据集。具体步骤参考 项目步骤记录。
-
项目刚刚开始,数据集还不充分,这里写下这些记录一下过程。
-
结果展示
四、项目步骤记录
- voc2007数据集格式
├── Annotations 进行 detection 任务时的标签文件,xml 形式,文件名与图片名一一对应
├── ImageSets 包含三个子文件夹 Layout、Main、Segmentation,其中 Main 存放的是分类和检测的数据集分割文件
├── JPEGImages 存放 .jpg 格式的图片文件
├── SegmentationClass 存放按照 class 分割的图片
└── SegmentationObject 存放按照 object 分割的图片
├── Main
│ ├── train.txt 写着用于训练的图片名称, 共 2501 个
│ ├── val.txt 写着用于验证的图片名称,共 2510 个
│ ├── trainval.txt train与val的合集。共 5011 个
│ ├── test.txt 写着用于测试的图片名称,共 4952 个
- 项目步骤
1、使用 image_preprocessing/generate_xml_dir/图片裁剪.py 文件将原来的图片裁剪成统一格式的文件。
2、image_preprocessing/generate_xml_dir/生成图片的xml文件.py 用于存放生成框标定的文件
3、使用genenrate_xml_dir 文件夹下的 框标定.py 来生产xml文件
4、将对应目录下的xml和jpg文件剪切到yolov4 voc数据集 相关目录
5、使用 yolov4-pytorch-master/VOCdevkit 文件夹下的 voc2yolo4.py 来生成
yolov4-pytorch-master/VOCdevkit/VOC2007/ImageSets/Main 文件夹下的 train.txt等文件。
6、运行 yolov4-pytorch-master/voc_annotation.py 文件,记得更改classes为自己的classes。
7、在使用 yolov4-pytorch-master/predict.py 预测的时候,记得更改 yolov4-pytorch-master/yolo.py 下
的"anchors_path"和"classes_path"两个文件路径为自己的路径,及log下的pth和model_data下的classes.txt
8、使用predict.py进行预测,发现结果棉铃虫效果较好,其他的稍微差一些。
注意事项:
1、opencv不能读取含中文路径的图片,涉及图片的文件夹路径一律使用英文
2、image_preprocessing/generate_xml_dir/sobel算子边缘检测.py 里面对于不同的蛾子,cv2.threshold的参数应该不一样,这一次
没有管它们的差别,如果想要更加精细化训练,可以考虑加上
五、数据标注范例
<annotation>
<folder>VOC2007</folder>
<filename>DSC00142.JPG</filename>
<source>
<database>ezi</database>
</source>
<size>
<width>760</width>
<height>700</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>mlc</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>483</xmin>
<ymin>446</ymin>
<xmax>582</xmax>
<ymax>549</ymax>
</bndbox>
</object>
<object>
<name>mlc</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>131</xmin>
<ymin>442</ymin>
<xmax>221</xmax>
<ymax>532</ymax>
</bndbox>
</object>
</annotation>
六、关于xml的创建、读写问题
(1)xml的创建
# -*- coding:utf-8 -*-
# https://zhuanlan.zhihu.com/p/54269963
# from xml.etree import ElementTree as etree
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
from xml.etree.ElementTree import ElementTree
from xml.dom import minidom
# generate root node
root = Element('root')
# generate first child-node head
head = SubElement(root, 'head')
# child-node of head node
title = SubElement(head, 'title')
title.text = "Well Dola!"
# generate second child-node body
body = SubElement(root, 'body')
body.text = "I Love Dola!"
tree = ElementTree(root)
# write out xml data
tree.write('result.xml', encoding='utf-8')
- 结果 :result.xml
<root>
<head>
<title>Well Dola!</title>
</head>
<body>I Love Dola!</body>
</root>
(2)在xml有根元素时写入
- customer.xml
<customer ID="C003">
<name>kavin</name>
<phone>32467</phone>
<comments>
<![CDATA[A small but healthy company.]]>
</comments>
</customer>
- 在 customer.xml 中写入的范例代码
from xml.dom.minidom import parse
import xml.dom.minidom
import os
def writeXML():
domTree = parse("./customer.xml")
# 文档根元素
rootNode = domTree.documentElement
# 新建一个customer节点
customer_node = domTree.createElement("customer")
customer_node.setAttribute("ID", "C003")
# 创建name节点,并设置textValue
name_node = domTree.createElement("name")
name_text_value = domTree.createTextNode("kavin")
name_node.appendChild(name_text_value) # 把文本节点挂到name_node节点
customer_node.appendChild(name_node)
# 创建phone节点,并设置textValue
phone_node = domTree.createElement("phone")
phone_text_value = domTree.createTextNode("32467")
phone_node.appendChild(phone_text_value) # 把文本节点挂到name_node节点
customer_node.appendChild(phone_node)
# 创建comments节点,这里是CDATA
comments_node = domTree.createElement("comments")
cdata_text_value = domTree.createCDATASection("A small but healthy company.")
comments_node.appendChild(cdata_text_value)
customer_node.appendChild(comments_node)
rootNode.appendChild(customer_node)
with open('added_customer.xml', 'w') as f:
# 缩进 - 换行 - 编码
domTree.writexml(f, addindent=' ', encoding='utf-8')
if __name__ == '__main__':
writeXML()
- 结果
<?xml version="1.0" encoding="utf-8"?>
<customer ID="C003">
<name>kavin</name>
<phone>32467</phone>
<comments>
<![CDATA[A small but healthy company.]]>
</comments>
<customer ID="C003">
<name>kavin</name>
<phone>32467</phone>
<comments><![CDATA[A small but healthy company.]]> </comments>
</customer>
</customer>
(3)xml的读取
- movie.xml
<collection shelf="New Arrivals">
<movie title="Enemy Behind">
<type>War, Thriller</type>
<format>DVD</format>
<year>2003</year>
<rating>PG</rating>
<stars>10</stars>
<description>Talk about a US-Japan war</description>
</movie>
<movie title="Transformers">
<type>Anime, Science Fiction</type>
<format>DVD</format>
<year>1989</year>
<rating>R</rating>
<stars>8</stars>
<description>A schientific fiction</description>
</movie>
<movie title="Trigun">
<type>Anime, Action</type>
<format>DVD</format>
<episodes>4</episodes>
<rating>PG</rating>
<stars>10</stars>
<description>Vash the Stampede!</description>
</movie>
<movie title="Ishtar">
<type>Comedy</type>
<format>VHS</format>
<rating>PG</rating>
<stars>2</stars>
<description>Viewable boredom</description>
</movie>
</collection>
- 读取代码
# https://www.cnblogs.com/smart-zihan/p/12015192.html
from xml.dom.minidom import parse
import xml.dom.minidom
import os
def is_xml_exist(xml_path):
xml_exist = os.path.exists(xml_path)
if not xml_exist:
return False
return True
def read_movie_xml():
path = "movie.xml"
if not is_xml_exist(path):
print("%s is not exist" % path)
else:
# 使用minidom解析器打开XML文档
open_xml = parse(path)
root_node = open_xml.documentElement
shelf_attrib = "shelf"
if root_node.hasAttribute(shelf_attrib):
print("Lable: %s\tAttrib: %s\t\tValue: %s" % (
root_node.nodeName, shelf_attrib, root_node.getAttribute(shelf_attrib)))
print("")
# 在集合中获取所有电影
movie_node = "movie"
movies = root_node.getElementsByTagName(movie_node)
# 打印每部电影的详细信息
for movie in movies:
print("**** Movie ****")
if movie.hasAttribute("title"):
print("Title: %s" % movie.getAttribute("title"))
type_movie = movie.getElementsByTagName('type')[0]
print("Type: %s" % type_movie.childNodes[0].data)
format_movie = movie.getElementsByTagName('format')[0]
print("Format: %s" % format_movie.childNodes[0].data)
rating_movie = movie.getElementsByTagName('rating')[0]
print("Rating: %s" % rating_movie.childNodes[0].data)
descrip_movie = movie.getElementsByTagName('description')[0]
print("Rating: %s" % descrip_movie.childNodes[0].data)
print("")
if __name__ == "__main__":
read_movie_xml()