xml转json

from xml.etree import ElementTree

LISTTYPE = 1
DICTTYPE = 0


def getDictResults(res_dicts, iters):
    result_dicts = {}
    for iter in iters.getchildren():
        iterxml(iter, result_dicts)

    if result_dicts:
        res_dicts[iters.tag].update(result_dicts)


def getListResults(res_dicts, iters):
    result_lists = []
    for iter in iters.getchildren():
        result_dicts = {}
        iterxml(iter, result_dicts)
        result_lists.append(result_dicts.copy())
        del (result_dicts)

    if result_lists:
        if len(res_dicts[iters.tag].items()) == 0:
            res_dicts[iters.tag] = result_lists.copy()
        else:
            res_dicts[iters.tag]["__XmlObjChildren__"] = result_lists.copy()

        del (result_lists)


def checkxmlchildrentype(iters):
    taglist = []
    for iter in iters.getchildren():
        taglist.append(iter.tag)

    if len(set(taglist)) == len(taglist):
        return DICTTYPE
    else:
        return LISTTYPE


def getResults(res_dicts, iters):
    if checkxmlchildrentype(iters):
        return getListResults(res_dicts, iters)
    else:
        return getDictResults(res_dicts, iters)


# @res_dicts    {}
def iterxml(iter, res_dicts):
    res_dicts[iter.tag] = {}

    if iter.attrib:
        for k, v in dict(iter.attrib).items():
            res_dicts[iter.tag].update({k: v})

    if iter.text is not None and iter.text.strip() != "":
        res_dicts[iter.tag].update({"__XmlTagText__": iter.text.strip()})

    if iter.getchildren():
        getResults(res_dicts, iter)


def parserxmltojson(file_path):
    try:
        tree = ElementTree.parse(file_path)
    except Exception as e:
        # multi-byte encodings are not supported    把字符集改成utf-8就可以
        # encoding specified in XML declaration is incorrect    xml encoding标识和文件的字符集不同
        # syntax error    语法错误,乱码等
        # not well-formed (invalid token)    编辑器点击后字符集被修改成ASCII等,或者文件本身字符集和xml encoding不相同
        print("Parser {} Error, Errmsg: {}".format(file_path, e))
        return ""

    if tree is None:
        print("{} is None.".format(file_path))
        return ""

    root = tree.getroot()

    report = {}
    iterxml(root, report)
    # return getDictResults(root)

    return report
def getRst(packet):
    new_dict = {}
    for item in packet:
        try:
            item = item["proto"]
        except:
            item = item["field"]
        if "show" in item.keys():
            if item.get('name') is not None and item.get('name')!='':
                if item.get('show') != '':
                    new_dict[item.get('name')] = item.get('show')
                    if "__XmlObjChildren__"  in item.keys():
                        new_dict[item.get('name')+"_tree"] = getRst(item.get('__XmlObjChildren__'))
                    elif "field" in item.keys():
                        new_dict[item.get('name') + "_tree"] = getRst([{"field":item.get('field')}])
                else:
                    if "__XmlObjChildren__"  in item.keys():
                        new_dict[item.get('name')] = getRst(item.get('__XmlObjChildren__'))
                    elif "field" in item.keys():
                        new_dict[item.get('name')] = getRst([{"field":item.get('field')}])
                    else:
                        new_dict[item.get('name')] = ''
            else:
                if "__XmlObjChildren__"  in item.keys():
                    new_dict[item.get('show')] = getRst(item.get('__XmlObjChildren__'))
                else:
                    new_dict[item.get('show')] = ''
        else:
            if "__XmlObjChildren__" in item.keys():
                new_dict[item.get('name')] = getRst(item.get('__XmlObjChildren__'))

    try:
        new_dict.pop("geninfo")
    except:
        pass

    return new_dict






if __name__ == "__main__":
    jsonret = parserxmltojson("haha.xml")

    import json

    new_format = []
    packets = jsonret.get("pdml").get("__XmlObjChildren__")
    for packet in packets:
        packet = packet.get("packet")
        new_dic = getRst(packet)
        new_dic = {
            "_index": "packets-2020-06-13",
            "_type": "pcap_file",
            "_score": "null",
            "_source": {"layers":new_dic}
        }
        new_format.append(new_dic)
    print(new_format)
    json.dump(new_format,fp=open("test2.json", "w", encoding="utf-8"),ensure_ascii=False)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值