python实现xml标注文件生成mask
数据集:Medical Images for Nucleus Segmentation(TCGA,一个细胞核分割公共数据库,原图.tif,标注文件.xml)
xml文件结构:
若干个(X,Y)坐标点所围成的区域(region)表示一个细胞核的mask;若干个region组成的regions即这幅图所有细胞核的mask。
<Annotations MicronsPerPixel="0.252000">
<Annotation ...>
<Attributes>
...
</Attributes>
<Regions>
<RegionAttributeHeaders>
...
</RegionAttributeHeaders>
<Region Id="2" Type="0" Zoom="5" Selected="0" ImageLocation="" ImageFocus="0" Length="126.0" Area="1049.3" LengthMicrons="31.8" AreaMicrons="66.6" Text="" NegativeROA="0" InputRegionId="0" Analyze="1" DisplayId="2">
<Attributes/>
<Vertices>
<Vertex X="96.400000" Y="70.800000"/>
<Vertex X="96.600000" Y="71"/>
...
</Vertices>
</Region>
<Region Id="3" ..>
...
</Region>
...
</Regions>
<Plots/>
</Annotation>
</Annotations>
代码:
# -*- coding: utf-8 -*-
import numpy as np
import cv2
import os
import xml.etree.ElementTree as ET
def xml2mask(filename, dir):
mask = np.zeros([1000, 1000], dtype=np.uint8)
xml = str(dir + "\\" + file)
tree = ET.parse(xml)
root = tree.getroot()
regions = root.findall('Annotation/Regions/Region')
for region in regions:
points = []
for point in region.findall('Vertices/Vertex'):
x = float(point.attrib['X'])
y = float(point.attrib['Y'])
points.append([x, y])
pts = np.asarray([points], dtype=np.int32)
cv2.fillPoly(img=mask, pts=pts, color=255)
cv2.imwrite("the\\dir\\you\\want\\to\\save\\to\\" + os.path.splitext(filename)[0] + ".png", mask)
dir = "the\\path\\to\\your\\xml\\files"
files = os.listdir(dir)
i = 0
for file in files:
xml2mask(file, dir)
i+=1
print('已完成{0}幅图像!'.format(i))
print("全部完成!")
转换结果: