VOC2007数据集下载
官方下载地址: https://pjreddie.com/projects/pascal-voc-dataset-mirror/
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
beta v1.0
仅能显示单目标XML
*.jpg *.xml 必须是数字的名字
读取的XML如:
<Annotation>
<filename>1</filename>
<size>
<width>416</width>
<height>416</height>
<depth>3</depth>
</size>
<object>
<name>OKKK</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>83</xmin>
<ymin>71</ymin>
<xmax>95</xmax>
<ymax>396</ymax>
</bndbox>
</object>
</Annotation>
CODE
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import cv2
def xyxyFromXML(path):
# "./voc/1.xml"
tree = ET.parse(path)
# 文档根元素
root = tree.getroot()
for element in root.findall('object'):
label = element.find('name').text # 访问Element文本
# print(name)
for xywh in element.findall('bndbox'):
xmin = xywh.find('xmin').text
ymin = xywh.find('ymin').text
xmax = xywh.find('xmax').text
ymax = xywh.find('ymax').text
# print(xmin, ymin, xmax, ymax)
return [xmin, ymin, xmax, ymax], label
def drawImg(img, xyxy):
xmin = int(xyxy[0])
ymin = int(xyxy[1])
xmax = int(xyxy[2])
ymax = int(xyxy[3])
return cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
def main():
# 要显示的数量
# {num}.jpg 根据实际情况要修改
for idx in range(4):
num = idx + 1
imgPath = f'VOCdevkit/VOC2007/JPEGImages/{num}.jpg'
xmlPath = f'VOCdevkit/VOC2007/Annotations/{num}.xml'
xyxy, label = xyxyFromXML(xmlPath)
img = cv2.imread(imgPath)
imgRect = drawImg(img, xyxy)
cv2.imshow('imgRect', imgRect)
cv2.waitKey()
if __name__ == "__main__":
main()
beta v2.0
可绘制多目标XML
*.jpg *.xml 名字随意
voc XML
<annotation>
<folder>VOC2007</folder>
<filename>000003.jpg</filename>
<source>
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
<flickrid>138563409</flickrid>
</source>
<owner>
<flickrid>RandomEvent101</flickrid>
<name>?</name>
</owner>
<size>
<width>500</width>
<height>375</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>sofa</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>123</xmin>
<ymin>155</ymin>
<xmax>215</xmax>
<ymax>195</ymax>
</bndbox>
</object>
<object>
<name>chair</name>
<pose>Left</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>239</xmin>
<ymin>156</ymin>
<xmax>307</xmax>
<ymax>205</ymax>
</bndbox>
</object>
</annotation>
my XML
<Annotation>
<filename>aa1</filename>
<size>
<width>1920</width>
<height>1080</height>
<depth>3</depth>
</size>
<object>
<name>sn</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1071</xmin>
<ymin>679</ymin>
<xmax>1240</xmax>
<ymax>797</ymax>
</bndbox>
</object>
<object>
<name>sn</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>718</xmin>
<ymin>685</ymin>
<xmax>887</xmax>
<ymax>803</ymax>
</bndbox>
</object>
<object>
<name>so</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1301</xmin>
<ymin>672</ymin>
<xmax>1642</xmax>
<ymax>810</ymax>
</bndbox>
</object>
</Annotation>
CODE
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import os.path as osp
import os
import cv2
import random
def xyxyFromXML(path):
# "./voc/1.xml"
tree = ET.parse(path)
# 文档根元素
root = tree.getroot()
ret = [] # shape:[num, 5] 5:[xmin, ymin, xmax, ymax, name]
for element in root.findall('object'):
labelname = element.find('name').text # 访问Element文本
# print(name)
for xywh in element.findall('bndbox'):
xmin = xywh.find('xmin').text
ymin = xywh.find('ymin').text
xmax = xywh.find('xmax').text
ymax = xywh.find('ymax').text
ret.append([xmin, ymin, xmax, ymax, labelname])
return ret
def drawImg(img, xyxys):
imgRect = img.copy()
_colors = ((0, 255, 0),
(255, 0, 255),
(255, 69, 0),
(0, 250, 154),
(255, 165, 0),
(205, 133, 63))
for i in range(len(xyxys)):
xyxy = xyxys[i]
xmin = int(xyxy[0])
ymin = int(xyxy[1])
xmax = int(xyxy[2])
ymax = int(xyxy[3])
labelname = str(xyxy[4])
color = _colors[int(i % 6)]
imgRect = cv2.rectangle(imgRect, (xmin, ymin), (xmax, ymax), color, 2)
imgRect = cv2.putText(imgRect, labelname, (xmin, ymin-5), cv2.FONT_HERSHEY_SIMPLEX, 1.4, color, 2)
return imgRect
def showImg(imgPath, xyxys, showSrc=True, showDst=True, resize=None):
img = cv2.imread(imgPath)
imgRect = drawImg(img, xyxys)
if resize != None:
_shape = (int(img.shape[1]*resize), int(img.shape[0]*resize))
img = cv2.resize(img, _shape)
imgRect = cv2.resize(imgRect, _shape)
if showSrc:
cv2.imshow('src', img)
if showDst:
cv2.imshow('imgRect', imgRect)
cv2.waitKey()
def main():
# 要显示的数量
# {num}.jpg 根据实际情况要修改
save_imgs_path = 'VOCdevkit/VOC2007/JPEGImages/'
save_xmls_path = 'VOCdevkit/VOC2007/Annotations/'
showNum = 2
img_name_list = os.listdir(save_imgs_path)
for i in range(showNum):
name = img_name_list[i].split('.')[0]
imgPath = osp.join(save_imgs_path, name + '.jpg')
xmlPath = osp.join(save_xmls_path, name + '.xml')
# [num, 5]
xyxys = xyxyFromXML(xmlPath)
showImg(imgPath, xyxys, showSrc=True, showDst=True, resize=0.5)
if __name__ == "__main__":
main()