xml转json

最新推荐文章于 2024-06-05 13:49:14 发布

nio006

最新推荐文章于 2024-06-05 13:49:14 发布

阅读量166

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/weixin_43890188/article/details/106748930

版权

python 专栏收录该内容

23 篇文章 0 订阅

订阅专栏

from xml.etree import ElementTree

LISTTYPE = 1
DICTTYPE = 0


def getDictResults(res_dicts, iters):
    result_dicts = {}
    for iter in iters.getchildren():
        iterxml(iter, result_dicts)

    if result_dicts:
        res_dicts[iters.tag].update(result_dicts)


def getListResults(res_dicts, iters):
    result_lists = []
    for iter in iters.getchildren():
        result_dicts = {}
        iterxml(iter, result_dicts)
        result_lists.append(result_dicts.copy())
        del (result_dicts)

    if result_lists:
        if len(res_dicts[iters.tag].items()) == 0:
            res_dicts[iters.tag] = result_lists.copy()
        else:
            res_dicts[iters.tag]["__XmlObjChildren__"] = result_lists.copy()

        del (result_lists)


def checkxmlchildrentype(iters):
    taglist = []
    for iter in iters.getchildren():
        taglist.append(iter.tag)

    if len(set(taglist)) == len(taglist):
        return DICTTYPE
    else:
        return LISTTYPE


def getResults(res_dicts, iters):
    if checkxmlchildrentype(iters):
        return getListResults(res_dicts, iters)
    else:
        return getDictResults(res_dicts, iters)


# @res_dicts    {}
def iterxml(iter, res_dicts):
    res_dicts[iter.tag] = {}

    if iter.attrib:
        for k, v in dict(iter.attrib).items():
            res_dicts[iter.tag].update({k: v})

    if iter.text is not None and iter.text.strip() != "":
        res_dicts[iter.tag].update({"__XmlTagText__": iter.text.strip()})

    if iter.getchildren():
        getResults(res_dicts, iter)


def parserxmltojson(file_path):
    try:
        tree = ElementTree.parse(file_path)
    except Exception as e:
        # multi-byte encodings are not supported    把字符集改成utf-8就可以
        # encoding specified in XML declaration is incorrect    xml encoding标识和文件的字符集不同
        # syntax error    语法错误，乱码等
        # not well-formed (invalid token)    编辑器点击后字符集被修改成ASCII等，或者文件本身字符集和xml encoding不相同
        print("Parser {} Error, Errmsg: {}".format(file_path, e))
        return ""

    if tree is None:
        print("{} is None.".format(file_path))
        return ""

    root = tree.getroot()

    report = {}
    iterxml(root, report)
    # return getDictResults(root)

    return report
def getRst(packet):
    new_dict = {}
    for item in packet:
        try:
            item = item["proto"]
        except:
            item = item["field"]
        if "show" in item.keys():
            if item.get('name') is not None and item.get('name')!='':
                if item.get('show') != '':
                    new_dict[item.get('name')] = item.get('show')
                    if "__XmlObjChildren__"  in item.keys():
                        new_dict[item.get('name')+"_tree"] = getRst(item.get('__XmlObjChildren__'))
                    elif "field" in item.keys():
                        new_dict[item.get('name') + "_tree"] = getRst([{"field":item.get('field')}])
                else:
                    if "__XmlObjChildren__"  in item.keys():
                        new_dict[item.get('name')] = getRst(item.get('__XmlObjChildren__'))
                    elif "field" in item.keys():
                        new_dict[item.get('name')] = getRst([{"field":item.get('field')}])
                    else:
                        new_dict[item.get('name')] = ''
            else:
                if "__XmlObjChildren__"  in item.keys():
                    new_dict[item.get('show')] = getRst(item.get('__XmlObjChildren__'))
                else:
                    new_dict[item.get('show')] = ''
        else:
            if "__XmlObjChildren__" in item.keys():
                new_dict[item.get('name')] = getRst(item.get('__XmlObjChildren__'))

    try:
        new_dict.pop("geninfo")
    except:
        pass

    return new_dict






if __name__ == "__main__":
    jsonret = parserxmltojson("haha.xml")

    import json

    new_format = []
    packets = jsonret.get("pdml").get("__XmlObjChildren__")
    for packet in packets:
        packet = packet.get("packet")
        new_dic = getRst(packet)
        new_dic = {
            "_index": "packets-2020-06-13",
            "_type": "pcap_file",
            "_score": "null",
            "_source": {"layers":new_dic}
        }
        new_format.append(new_dic)
    print(new_format)
    json.dump(new_format,fp=open("test2.json", "w", encoding="utf-8"),ensure_ascii=False)

nio006

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
xml转json

from xml.etree import ElementTreeLISTTYPE = 1DICTTYPE = 0def getDictResults(res_dicts, iters): result_dicts = {} for iter in iters.getchildren(): iterxml(iter, result_dicts) if result_dicts: res_dicts[iters.tag].update(re
复制链接

扫一扫

专栏目录