from xml.etree importElementTree
LISTTYPE= 1DICTTYPE=0defgetDictResults(res_dicts, iters):
result_dicts={}for iter initers.getchildren():
iterxml(iter, result_dicts)ifresult_dicts:
res_dicts[iters.tag].update(result_dicts)defgetListResults(res_dicts, iters):
result_lists=[]for iter initers.getchildren():
result_dicts={}
iterxml(iter, result_dicts)
result_lists.append(result_dicts.copy())del(result_dicts)ifresult_lists:if len(res_dicts[iters.tag].items()) ==0:
res_dicts[iters.tag]=result_lists.copy()else:
res_dicts[iters.tag]["__XmlObjChildren__"] =result_lists.copy()del(result_lists)defcheckxmlchildrentype(iters):
taglist=[]for iter initers.getchildren():
taglist.append(iter.tag)if len(set(taglist)) ==len(taglist):returnDICTTYPEelse:returnLISTTYPEdefgetResults(res_dicts, iters):ifcheckxmlchildrentype(iters):returngetListResults(res_dicts, iters)else:returngetDictResults(res_dicts, iters)#@res_dicts {}
defiterxml(iter, res_dicts):
res_dicts[iter.tag]={}ifiter.attrib:for k,v indict(iter.attrib).items():
res_dicts[iter.tag].update({k : v})if iter.text is not None and iter.text.strip() != "":
res_dicts[iter.tag].update({"__XmlTagText__": iter.text.strip()})ifiter.getchildren():
getResults(res_dicts, iter)defparserxmltojson(file_path):try:
tree=ElementTree.parse(file_path)exceptException as e:#multi-byte encodings are not supported 把字符集改成utf-8就可以
#encoding specified in XML declaration is incorrect xml encoding标识和文件的字符集不同
#syntax error 语法错误,乱码等
#not well-formed (invalid token) 编辑器点击后字符集被修改成ASCII等,或者文件本身字符集和xml encoding不相同
print("Parser {} Error, Errmsg: {}".format(file_path, e))return ""
if tree isNone:print("{} is None.".format(file_path))return ""root=tree.getroot()
report={}
iterxml(root, report)#return getDictResults(root)
returnreportif __name__ == "__main__":
jsonret= parserxmltojson("test.xml")
with open("test.json", "w", encoding="utf-8") as fd:
fd.write(str(jsonret))print(jsonret)