from xml.etree import ElementTree
LISTTYPE = 1
DICTTYPE = 0
def getDictResults(res_dicts, iters):
result_dicts = {}
for iter in iters.getchildren():
iterxml(iter, result_dicts)
if result_dicts:
res_dicts[iters.tag].update(result_dicts)
def getListResults(res_dicts, iters):
result_lists = []
for iter in iters.getchildren():
result_dicts = {}
iterxml(iter, result_dicts)
result_lists.append(result_dicts.copy())
del (result_dicts)
if result_lists:
if len(res_dicts[iters.tag].items()) == 0:
res_dicts[iters.tag] = result_lists.copy()
else:
res_dicts[iters.tag]["__XmlObjChildren__"] = result_lists.copy()
del (result_lists)
def checkxmlchildrentype(iters):
taglist = []
for iter in iters.getchildren():
taglist.append(iter.tag)
if len(set(taglist)) == len(taglist):
return DICTTYPE
else:
return LISTTYPE
def getResults(res_dicts, iters):
if checkxmlchildrentype(iters):
return getListResults(res_dicts, iters)
else:
return getDictResults(res_dicts, iters)
# @res_dicts {}
def iterxml(iter, res_dicts):
res_dicts[iter.tag] = {}
if iter.attrib:
for k, v in dict(iter.attrib).items():
res_dicts[iter.tag].update({k: v})
if iter.text is not None and iter.text.strip() != "":
res_dicts[iter.tag].update({"__XmlTagText__": iter.text.strip()})
if iter.getchildren():
getResults(res_dicts, iter)
def parserxmltojson(file_path):
try:
tree = ElementTree.parse(file_path)
except Exception as e:
# multi-byte encodings are not supported 把字符集改成utf-8就可以
# encoding specified in XML declaration is incorrect xml encoding标识和文件的字符集不同
# syntax error 语法错误,乱码等
# not well-formed (invalid token) 编辑器点击后字符集被修改成ASCII等,或者文件本身字符集和xml encoding不相同
print("Parser {} Error, Errmsg: {}".format(file_path, e))
return ""
if tree is None:
print("{} is None.".format(file_path))
return ""
root = tree.getroot()
report = {}
iterxml(root, report)
# return getDictResults(root)
return report
def getRst(packet):
new_dict = {}
for item in packet:
try:
item = item["proto"]
except:
item = item["field"]
if "show" in item.keys():
if item.get('name') is not None and item.get('name')!='':
if item.get('show') != '':
new_dict[item.get('name')] = item.get('show')
if "__XmlObjChildren__" in item.keys():
new_dict[item.get('name')+"_tree"] = getRst(item.get('__XmlObjChildren__'))
elif "field" in item.keys():
new_dict[item.get('name') + "_tree"] = getRst([{"field":item.get('field')}])
else:
if "__XmlObjChildren__" in item.keys():
new_dict[item.get('name')] = getRst(item.get('__XmlObjChildren__'))
elif "field" in item.keys():
new_dict[item.get('name')] = getRst([{"field":item.get('field')}])
else:
new_dict[item.get('name')] = ''
else:
if "__XmlObjChildren__" in item.keys():
new_dict[item.get('show')] = getRst(item.get('__XmlObjChildren__'))
else:
new_dict[item.get('show')] = ''
else:
if "__XmlObjChildren__" in item.keys():
new_dict[item.get('name')] = getRst(item.get('__XmlObjChildren__'))
try:
new_dict.pop("geninfo")
except:
pass
return new_dict
if __name__ == "__main__":
jsonret = parserxmltojson("haha.xml")
import json
new_format = []
packets = jsonret.get("pdml").get("__XmlObjChildren__")
for packet in packets:
packet = packet.get("packet")
new_dic = getRst(packet)
new_dic = {
"_index": "packets-2020-06-13",
"_type": "pcap_file",
"_score": "null",
"_source": {"layers":new_dic}
}
new_format.append(new_dic)
print(new_format)
json.dump(new_format,fp=open("test2.json", "w", encoding="utf-8"),ensure_ascii=False)
xml转json
最新推荐文章于 2024-06-05 13:49:14 发布