Python xml的解析

XML 指可扩展标记语言(eXtensible Markup Language),被设计用来传输和存储数据。

注意:

1.有且只有一个根元素

2.标签必须有关闭标签

3.属性值须加引号

4.属性值须加引号

5.标签名不能有空格,不能以数字或"_" (下划线)开头

6.不能以xml(或XML、或Xml 等)开头,名称中间不能包含冒号(:)

7.空格/回车/制表符在xml中都是文本节点

8.必须正确地嵌套

python中创建一个  .xml文件,

#格式严格,不要随便删除任意字符包括空格

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE students [
        <!ELEMENT students (student+)>
        <!ELEMENT student (name,age,sex)>
        <!ELEMENT name (#PCDATA)>
        <!ELEMENT age (#PCDATA)>
        <!ELEMENT sex (#PCDATA)>
        <!ATTLIST student id CDATA #REQUIRED>
        ]>
<students>
    <student id="1">
        <name>张三</name>
        <age>19</age>
        <sex></sex>
    </student>
    <student id="2">
        <name>韩梅梅</name>
        <age>18</age>
        <sex></sex>
    </student>
    <student id="3">
        <name>欧阳俊杰</name>
        <age>21</age>
        <sex></sex>
    </student>

三中解析方式:

dom

from xml.dom.minidom import parse
stus=parse("XML.xml")#XML为xml文件名
print(stus)
root=stus.documentElement
print(root)
students=root.getElementsByTagName("student")
class Student:
    def __init__(self,id=None,name=None,age=None,sex=None):
        self.id=id
        self.name=name
        self.age=age
        self.sex=sex
    def __repr__(self):
        if len(self.name.encode("utf-8"))<=8:
            return self.id + "\t" + self.name + "\t\t" + self.age + "\t" + self.sex
        else:
            return self.id+"\t"+self.name+"\t"+self.age+"\t"+self.sex
            #不写if,else会出现因为名字字数不同年龄不能对齐的请况
            #return self.id + "\t" + self.name.ljust(10) + "\t" + self.age + "\t" + self.sex
#           .ljust是给name从坐到右加宽度,但是中文比空格宽,在这里没用
for s in students:
    name=s.getElementsByTagName("name")[0].childNodes[0].data
    age = s.getElementsByTagName("age")[0].childNodes[0].data
    sex = s.getElementsByTagName("sex")[0].childNodes[0].data
    id = s.getAttribute("id")  # 得到ID,打.不出
#     print(name)
#     print(age)
#     print(sex)
#     print(id)
    stuList = []
    s=Student(id,name,age,sex)
    stuList.append(s)
    print(stuList)

# #[{name:},{age:},{sex:},{ID:}]
#     d=[]
#     d.append({"姓名":name})
#     d.append({"年龄": age})
#     d.append({"性别": sex})
#     d.append({"ID": id})
#     print(d)

sax

from xml.sax import parse
from xml.sax.handler import ContentHandler
# class SaxParser(ContentHandler):
#     def __init__(self,name=None):
#         self.name=name
#     def startDocument(self):
#         print("xml文件开始...")
#     def endDocument(self):
#         print("xml文件结束...")
#     def startElement(self, name, attrs):
#         print("标签"+name+"开始")
#         self.name=name
#     def characters(self, content):
#         if self.name=="name":
#             print("标签内容"+content)
#         elif  self.name=="age":
#             print("标签内容"+content)
#         elif self.name == "sex":
#             print("标签内容"+content)
#     def endElement(self, name):
#         print("标签"+name+"结束")
#         self.name=None
# parse("XML.xml",SaxParser())
class Student:
    def __init__(self,id=None,name=None,age=None,sex=None):
        self.id=id
        self.name=name
        self.age=age
        self.sex=sex
    def __repr__(self):
        if len(self.name.encode('UTF-8'))<=8:
            return self.id+'\t'+self.name+'\t\t'+self.age+'\t'+self.sex
        else:
            return self.id + '\t' + self.name + '\t' + self.age + '\t' + self.sex
stulist=[]
class saxParse(ContentHandler):
    def __init__(self,name=None):
        self.name=name
        self.stu=None
    def startDocument(self):
        pass
    def endDocument(self):
        pass
    def startElement(self,name,attrs):
        if name=='student':
            self.stu=Student()
            self.stu.id = attrs['id']
        self.name=name
    def endElement(self,name):
        if name=='student':
            stulist.append(self.stu)
        self.name=None
    def characters(self, content):
        if self.name=="name":
            self.stu.name=content
        elif  self.name=="age":
            self.stu.age = content
        elif self.name == "sex":
            self.stu.sex = content
parse("XML.xml",saxParse())
for i in stulist:
    print(i)

element tree

# try:
#     import xml.etree.cElementTree as ET
# except ImportError:
#     import xml.etree.ElementTree as ET
# def test():
#     tree=ET.parse("XML.xml")
#     students=tree.findall("student")
#     for stu in students:
#         children=stu.getchildren()
#         for c in children:
#             print(c.text)
# test()
class Student:
    def __init__(self,id=None,name=None,age=None,sex=None):
        self.id=id
        self.name=name
        self.age=age
        self.sex=sex
    def __repr__(self):
        if len(self.name.encode("utf-8"))<=8:
            return self.id + "\t" + self.name + "\t\t" + self.age + "\t" + self.sex
        else:
            return self.id+"\t"+self.name+"\t"+self.age+"\t"+self.sex
try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET
stuList=[]
def test():
    tree=ET.parse("XML.xml")
    students=tree.findall("student")
    for stu in students:
        student=Student()
        student.id=stu.attrib["id"]
        children=stu.getchildren()
        student.name=children[0].text
        student.age = children[1].text
        student.sex = children[2].text
        stuList.append(student)
test()
for stu in stuList:
    print(stu)

sax:逐行的解析,不能增删改

dom:把整个文档加载到内存中,翻译成一棵树,就可以进行crud操作

element tree:最为简单

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值