注:小结节是指只有一个坐标的结节,它与其他结节的区别是没有characteristics。
from xml.dom.minidom import parse
# 解析xml
def parseXML(xml_path):
nodules_coor = []
non_nodules_coor = []
domTree = parse(xml_path)
# print(domTree) # <xml.dom.minidom.Document object at 0x0000000003FD4E88>
# print(type(domTree)) # <class 'xml.dom.minidom.Document'>
# 文档根元素
rootNode = domTree.documentElement
print(rootNode.nodeName) # LidcReadMessage
exit(0)
# 所有医生标注
readingSessions = rootNode.getElementsByTagName("readingSession")
# print(readingSessions)
for rS in readingSessions:
nodules = rS.getElementsByTagName("unblindedReadNodule")
non_nodules = rS.getElementsByTagName("nonNodule")
# 解析结节
for nodule in nodules:
if nodule.getElementsByTagName("characteristics"):
continue
roi = nodule.getElementsByTagName("roi")[0]
Z_w = roi.getElementsByTagName("imageZposition")[0].childNodes[0].data
# print("Z_w---", Z_w)
edgemap = roi.getElementsByTagName("edgeMap")[0]
X_v = edgemap.getElementsByTagName("xCoord")[0].childNodes[0].data
Y_v = edgemap.getElementsByTagName("yCoord")[0].childNodes[0].data
coor = (int(X_v), int(Y_v), float(Z_w))
if coor not in nodules_coor:
nodules_coor.append(coor)
# 解析非结节
for non_nodule in non_nodules:
Z_w = non_nodule.getElementsByTagName("imageZposition")[0].childNodes[0].data
locus = non_nodule.getElementsByTagName("locus")[0]
X_v = locus.getElementsByTagName("xCoord")[0].childNodes[0].data
Y_v = locus.getElementsByTagName("yCoord")[0].childNodes[0].data
coor = (int(X_v), int(Y_v), float(Z_w))
if coor not in non_nodules_coor:
non_nodules_coor.append(coor)
print(r"Finish ", xml_path)
return nodules_coor, non_nodules_coor
if __name__ == '__main__':
xml_path = '115.xml'
nodules_coor, non_nodules_coor = parseXML(xml_path)
print("nodules_coor:", nodules_coor)
print("len(nodules_coor):", len(nodules_coor))
print("non_nodules_coor:", non_nodules_coor)
print("len(non_nodules_coor):", len(non_nodules_coor))