python解析xml字符串为字典_python - 如何将xml字符串转换为字典? - 堆栈内存溢出...

===============>>#1 票数:245

xmltodict (完全公开:我写了它)确实做到了:

xmltodict.parse("""

john

20

""")

# {u'person': {u'age': u'20', u'name': u'john'}}

===============>>#2 票数:52 已采纳

这是网站上的代码,以防万一链接损坏。

from xml.etree import cElementTree as ElementTree

class XmlListConfig(list):

def __init__(self, aList):

for element in aList:

if element:

# treat like dict

if len(element) == 1 or element[0].tag != element[1].tag:

self.append(XmlDictConfig(element))

# treat like list

elif element[0].tag == element[1].tag:

self.append(XmlListConfig(element))

elif element.text:

text = element.text.strip()

if text:

self.append(text)

class XmlDictConfig(dict):

'''

Example usage:

>>> tree = ElementTree.parse('your_file.xml')

>>> root = tree.getroot()

>>> xmldict = XmlDictConfig(root)

Or, if you want to use an XML string:

>>> root = ElementTree.XML(xml_string)

>>> xmldict = XmlDictConfig(root)

And then use xmldict for what it is... a dict.

'''

def __init__(self, parent_element):

if parent_element.items():

self.update(dict(parent_element.items()))

for element in parent_element:

if element:

# treat like dict - we assume that if the first two tags

# in a series are different, then they are all different.

if len(element) == 1 or element[0].tag != element[1].tag:

aDict = XmlDictConfig(element)

# treat like list - we assume that if the first two tags

# in a series are the same, then the rest are the same.

else:

# here, we put the list in dictionary; the key is the

# tag name the list elements all share in common, and

# the value is the list itself

aDict = {element[0].tag: XmlListConfig(element)}

# if the tag has attributes, add those to the dict

if element.items():

aDict.update(dict(element.items()))

self.update({element.tag: aDict})

# this assumes that if you've got an attribute in a tag,

# you won't be having any text. This may or may not be a

# good idea -- time will tell. It works for the way we are

# currently doing XML configuration files...

elif element.items():

self.update({element.tag: dict(element.items())})

# finally, if there are no child tags and no attributes, extract

# the text

else:

self.update({element.tag: element.text})

用法示例:

tree = ElementTree.parse('your_file.xml')

root = tree.getroot()

xmldict = XmlDictConfig(root)

//或者,如果要使用XML字符串:

root = ElementTree.XML(xml_string)

xmldict = XmlDictConfig(root)

===============>>#3 票数:39

以下XML-to-Python-dict代码片段分析了该XML-to-JSON“规范”之后的实体以及属性。 这是处理XML所有情况的最通用的解决方案。

from collections import defaultdict

def etree_to_dict(t):

d = {t.tag: {} if t.attrib else None}

children = list(t)

if children:

dd = defaultdict(list)

for dc in map(etree_to_dict, children):

for k, v in dc.items():

dd[k].append(v)

d = {t.tag: {k:v[0] if len(v) == 1 else v for k, v in dd.items()}}

if t.attrib:

d[t.tag].update(('@' + k, v) for k, v in t.attrib.items())

if t.text:

text = t.text.strip()

if children or t.attrib:

if text:

d[t.tag]['#text'] = text

else:

d[t.tag] = text

return d

它用于:

from xml.etree import cElementTree as ET

e = ET.XML('''

text

text

text text

text text

text text

''')

from pprint import pprint

pprint(etree_to_dict(e))

此示例的输出(根据上面链接的“规范”)应为:

{'root': {'e': [None,

'text',

{'@name': 'value'},

{'#text': 'text', '@name': 'value'},

{'a': 'text', 'b': 'text'},

{'a': ['text', 'text']},

{'#text': 'text', 'a': 'text'}]}}

不一定很漂亮,但是它是明确的,并且更简单的XML输入会导致更简单的JSON。 :)

更新资料

如果要进行相反的操作 , 从JSON / dict发出XML字符串 ,则可以使用:

try:

basestring

except NameError: # python3

basestring = str

def dict_to_etree(d):

def _to_etree(d, root):

if not d:

pass

elif isinstance(d, basestring):

root.text = d

elif isinstance(d, dict):

for k,v in d.items():

assert isinstance(k, basestring)

if k.startswith('#'):

assert k == '#text' and isinstance(v, basestring)

root.text = v

elif k.startswith('@'):

assert isinstance(v, basestring)

root.set(k[1:], v)

elif isinstance(v, list):

for e in v:

_to_etree(e, ET.SubElement(root, k))

else:

_to_etree(v, ET.SubElement(root, k))

else:

raise TypeError('invalid type: ' + str(type(d)))

assert isinstance(d, dict) and len(d) == 1

tag, body = next(iter(d.items()))

node = ET.Element(tag)

_to_etree(body, node)

return ET.tostring(node)

pprint(dict_to_etree(d))

===============>>#4 票数:25

这个轻量级的版本虽然不可配置,但很容易根据需要进行定制,并且可以在旧的python中使用。 它也是严格的-意味着无论属性是否存在,结果都是相同的。

import xml.etree.ElementTree as ET

from copy import copy

def dictify(r,root=True):

if root:

return {r.tag : dictify(r, False)}

d=copy(r.attrib)

if r.text:

d["_text"]=r.text

for x in r.findall("./*"):

if x.tag not in d:

d[x.tag]=[]

d[x.tag].append(dictify(x,False))

return d

所以:

root = ET.fromstring("vw")

dictify(root)

结果是:

{'erik': {'a': [{'x': '1', '_text': 'v'}, {'y': '2', '_text': 'w'}]}}

===============>>#5 票数:6

PicklingTools库的最新版本(1.3.0和1.3.1)支持从XML转换为Python dict的工具。

没有为转换颇有几分文档在这里 :文档中详细的所有XML和Python字典之间转换时将产生的决定和问题描述(也有一些边缘情况:属性,列表,匿名列表,匿名多数转换器无法处理的dict,eval等)。 通常,这些转换器易于使用。 如果“ example.xml”包含:

1

2.2

three

然后将其转换为字典:

>>> from xmlloader import *

>>> example = file('example.xml', 'r') # A document containing XML

>>> xl = StreamXMLLoader(example, 0) # 0 = all defaults on operation

>>> result = xl.expect XML()

>>> print result

{'top': {'a': '1', 'c': 'three', 'b': '2.2'}}

有一些可以在C ++和Python中进行转换的工具:C ++和Python可以进行相同的转换,但是C ++的速度要快60倍左右

===============>>#6 票数:5

免责声明:此经过修改的XML解析器受Adam Clark启发。原始XML解析器适用于大多数简单情况。 但是,它不适用于某些复杂的XML文件。 我逐行调试了代码,最后解决了一些问题。 如果您发现一些错误,请告诉我。 我很高兴修复它。

class XmlDictConfig(dict):

'''

Note: need to add a root into if no exising

Example usage:

>>> tree = ElementTree.parse('your_file.xml')

>>> root = tree.getroot()

>>> xmldict = XmlDictConfig(root)

Or, if you want to use an XML string:

>>> root = ElementTree.XML(xml_string)

>>> xmldict = XmlDictConfig(root)

And then use xmldict for what it is... a dict.

'''

def __init__(self, parent_element):

if parent_element.items():

self.updateShim( dict(parent_element.items()) )

for element in parent_element:

if len(element):

aDict = XmlDictConfig(element)

# if element.items():

# aDict.updateShim(dict(element.items()))

self.updateShim({element.tag: aDict})

elif element.items(): # items() is specialy for attribtes

elementattrib= element.items()

if element.text:

elementattrib.append((element.tag,element.text )) # add tag:text if there exist

self.updateShim({element.tag: dict(elementattrib)})

else:

self.updateShim({element.tag: element.text})

def updateShim (self, aDict ):

for key in aDict.keys(): # keys() includes tag and attributes

if key in self:

value = self.pop(key)

if type(value) is not list:

listOfDicts = []

listOfDicts.append(value)

listOfDicts.append(aDict[key])

self.update({key: listOfDicts})

else:

value.append(aDict[key])

self.update({key: value})

else:

self.update({key:aDict[key]}) # it was self.update(aDict)

===============>>#7 票数:4

您可以使用lxml轻松完成此操作。 首先安装它:

[sudo] pip install lxml

这是我编写的递归函数,可以为您完成繁重的工作:

from lxml import objectify as xml_objectify

def xml_to_dict(xml_str):

""" Convert xml to dict, using lxml v3.4.2 xml processing library """

def xml_to_dict_recursion(xml_object):

dict_object = xml_object.__dict__

if not dict_object:

return xml_object

for key, value in dict_object.items():

dict_object[key] = xml_to_dict_recursion(value)

return dict_object

return xml_to_dict_recursion(xml_objectify.fromstring(xml_str))

xml_string = """<?xml version="1.0" encoding="UTF-8"?>

Test1234

3455"""

print xml_to_dict(xml_string)

以下变体保留了父键/元素:

def xml_to_dict(xml_str):

""" Convert xml to dict, using lxml v3.4.2 xml processing library, see http://lxml.de/ """

def xml_to_dict_recursion(xml_object):

dict_object = xml_object.__dict__

if not dict_object: # if empty dict returned

return xml_object

for key, value in dict_object.items():

dict_object[key] = xml_to_dict_recursion(value)

return dict_object

xml_obj = objectify.fromstring(xml_str)

return {xml_obj.tag: xml_to_dict_recursion(xml_obj)}

如果只想返回一个子树并将其转换为dict,则可以使用Element.find()获取该子树,然后对其进行转换:

xml_obj.find('.//') # lxml.objectify.ObjectifiedElement instance

请在此处查看lxml文档。 我希望这有帮助!

===============>>#8 票数:2

def xml_to_dict(node):

u'''

@param node:lxml_node

@return: dict

'''

return {'tag': node.tag, 'text': node.text, 'attrib': node.attrib, 'children': {child.tag: xml_to_dict(child) for child in node}}

===============>>#9 票数:2

对于Python,最容易使用的XML解析器是ElementTree(从2.5x开始,在标准库xml.etree.ElementTree中)。 我认为没有什么可以完全满足您的要求。 使用ElementTree编写某些内容来完成您想要的事情,这很简单,但是为什么要转换为字典,为什么不直接使用ElementTree。

===============>>#10 票数:2

我在两者之间添加了一个垫片,以查看在self.update()之前该元素是否已经存在。 如果是这样,则弹出现有条目并从现有条目和新条目中创建一个列表。 随后的所有重复项都将添加到列表中。

不知道是否可以更妥善地处理此问题,但它的工作原理是:

import xml.etree.ElementTree as ElementTree

class XmlDictConfig(dict):

def __init__(self, parent_element):

if parent_element.items():

self.updateShim(dict(parent_element.items()))

for element in parent_element:

if len(element):

aDict = XmlDictConfig(element)

if element.items():

aDict.updateShim(dict(element.items()))

self.updateShim({element.tag: aDict})

elif element.items():

self.updateShim({element.tag: dict(element.items())})

else:

self.updateShim({element.tag: element.text.strip()})

def updateShim (self, aDict ):

for key in aDict.keys():

if key in self:

value = self.pop(key)

if type(value) is not list:

listOfDicts = []

listOfDicts.append(value)

listOfDicts.append(aDict[key])

self.update({key: listOfDicts})

else:

value.append(aDict[key])

self.update({key: value})

else:

self.update(aDict)

===============>>#11 票数:2

从@ K3 --- rnc 响应 (最适合我),我添加了一些小修改以从XML文本获得OrderedDict(有时顺序很重要):

def etree_to_ordereddict(t):

d = OrderedDict()

d[t.tag] = OrderedDict() if t.attrib else None

children = list(t)

if children:

dd = OrderedDict()

for dc in map(etree_to_ordereddict, children):

for k, v in dc.iteritems():

if k not in dd:

dd[k] = list()

dd[k].append(v)

d = OrderedDict()

d[t.tag] = OrderedDict()

for k, v in dd.iteritems():

if len(v) == 1:

d[t.tag][k] = v[0]

else:

d[t.tag][k] = v

if t.attrib:

d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems())

if t.text:

text = t.text.strip()

if children or t.attrib:

if text:

d[t.tag]['#text'] = text

else:

d[t.tag] = text

return d

在@ K3 --- rnc示例中,可以使用它:

from xml.etree import cElementTree as ET

e = ET.XML('''

text

text

text text

text text

text text

''')

from pprint import pprint

pprint(etree_to_ordereddict(e))

希望能帮助到你 ;)

===============>>#12 票数:1

这是ActiveState解决方案的链接-以及代码再次消失的代码。

==================================================

xmlreader.py:

==================================================

from xml.dom.minidom import parse

class NotTextNodeError:

pass

def getTextFromNode(node):

"""

scans through all children of node and gathers the

text. if node has non-text child-nodes, then

NotTextNodeError is raised.

"""

t = ""

for n in node.childNodes:

if n.nodeType == n.TEXT_NODE:

t += n.nodeValue

else:

raise NotTextNodeError

return t

def nodeToDic(node):

"""

nodeToDic() scans through the children of node and makes a

dictionary from the content.

three cases are differentiated:

- if the node contains no other nodes, it is a text-node

and {nodeName:text} is merged into the dictionary.

- if the node has the attribute "method" set to "true",

then it's children will be appended to a list and this

list is merged to the dictionary in the form: {nodeName:list}.

- else, nodeToDic() will call itself recursively on

the nodes children (merging {nodeName:nodeToDic()} to

the dictionary).

"""

dic = {}

for n in node.childNodes:

if n.nodeType != n.ELEMENT_NODE:

continue

if n.getAttribute("multiple") == "true":

# node with multiple children:

# put them in a list

l = []

for c in n.childNodes:

if c.nodeType != n.ELEMENT_NODE:

continue

l.append(nodeToDic(c))

dic.update({n.nodeName:l})

continue

try:

text = getTextFromNode(n)

except NotTextNodeError:

# 'normal' node

dic.update({n.nodeName:nodeToDic(n)})

continue

# text node

dic.update({n.nodeName:text})

continue

return dic

def readConfig(filename):

dom = parse(filename)

return nodeToDic(dom)

def test():

dic = readConfig("sample.xml")

print dic["Config"]["Name"]

print

for item in dic["Config"]["Items"]:

print "Item's Name:", item["Name"]

print "Item's Value:", item["Value"]

test()

==================================================

sample.xml:

==================================================

My Config File

First Item

Value 1

Second Item

Value 2

==================================================

output:

==================================================

My Config File

Item's Name: First Item

Item's Value: Value 1

Item's Name: Second Item

Item's Value: Value 2

===============>>#13 票数:0

@dibrovsd:如果xml具有多个同名标签,则解决方案将不起作用

根据您的想法,我对代码进行了一些修改,并将其编写为常规节点而不是root用户:

from collections import defaultdict

def xml2dict(node):

d, count = defaultdict(list), 1

for i in node:

d[i.tag + "_" + str(count)]['text'] = i.findtext('.')[0]

d[i.tag + "_" + str(count)]['attrib'] = i.attrib # attrib gives the list

d[i.tag + "_" + str(count)]['children'] = xml2dict(i) # it gives dict

return d

===============>>#14 票数:0

我修改了我的口味的答案之一,并使用同一标签处理多个值,例如考虑以下保存在XML.xml文件中的xml代码。

inAB

inABCDE

value2

value3

123

abc

F

和在python中

import xml.etree.ElementTree as ET

class XMLToDictionary(dict):

def __init__(self, parentElement):

self.parentElement = parentElement

for child in list(parentElement):

child.text = child.text if (child.text != None) else ' '

if len(child) == 0:

self.update(self._addToDict(key= child.tag, value = child.text.strip(), dict = self))

else:

innerChild = XMLToDictionary(parentElement=child)

self.update(self._addToDict(key=innerChild.parentElement.tag, value=innerChild, dict=self))

def getDict(self):

return {self.parentElement.tag: self}

class _addToDict(dict):

def __init__(self, key, value, dict):

if not key in dict:

self.update({key: value})

else:

identical = dict[key] if type(dict[key]) == list else [dict[key]]

self.update({key: identical + [value]})

tree = ET.parse('./XML.xml')

root = tree.getroot()

parseredDict = XMLToDictionary(root).getDict()

print(parseredDict)

输出是

{'A': {'B': [{'BB': 'inAB', 'C': {'D': {'E': ['inABCDE', 'value2', 'value3']}, 'inCout-ofD': '123'}}, 'abc'], 'F': 'F'}}

===============>>#15 票数:0

在某一时刻,我不得不解析和编写仅包含没有属性的元素的XML,因此从XML到dict的1:1映射很容易。 如果别人也不需要属性,这就是我想出的:

def xmltodict(element):

if not isinstance(element, ElementTree.Element):

raise ValueError("must pass xml.etree.ElementTree.Element object")

def xmltodict_handler(parent_element):

result = dict()

for element in parent_element:

if len(element):

obj = xmltodict_handler(element)

else:

obj = element.text

if result.get(element.tag):

if hasattr(result[element.tag], "append"):

result[element.tag].append(obj)

else:

result[element.tag] = [result[element.tag], obj]

else:

result[element.tag] = obj

return result

return {element.tag: xmltodict_handler(element)}

def dicttoxml(element):

if not isinstance(element, dict):

raise ValueError("must pass dict type")

if len(element) != 1:

raise ValueError("dict must have exactly one root key")

def dicttoxml_handler(result, key, value):

if isinstance(value, list):

for e in value:

dicttoxml_handler(result, key, e)

elif isinstance(value, basestring):

elem = ElementTree.Element(key)

elem.text = value

result.append(elem)

elif isinstance(value, int) or isinstance(value, float):

elem = ElementTree.Element(key)

elem.text = str(value)

result.append(elem)

elif value is None:

result.append(ElementTree.Element(key))

else:

res = ElementTree.Element(key)

for k, v in value.items():

dicttoxml_handler(res, k, v)

result.append(res)

result = ElementTree.Element(element.keys()[0])

for key, value in element[element.keys()[0]].items():

dicttoxml_handler(result, key, value)

return result

def xmlfiletodict(filename):

return xmltodict(ElementTree.parse(filename).getroot())

def dicttoxmlfile(element, filename):

ElementTree.ElementTree(dicttoxml(element)).write(filename)

def xmlstringtodict(xmlstring):

return xmltodict(ElementTree.fromstring(xmlstring).getroot())

def dicttoxmlstring(element):

return ElementTree.tostring(dicttoxml(element))

===============>>#16 票数:-1

我有一个递归方法,可从lxml元素获取字典

def recursive_dict(element):

return (element.tag.split('}')[1],

dict(map(recursive_dict, element.getchildren()),

**element.attrib))

ask by user361526 translate from so

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值