python xml

最新推荐文章于 2024-06-08 14:45:56 发布

CV/NLP大虾

最新推荐文章于 2024-06-08 14:45:56 发布

阅读量166

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/m0_37400316/article/details/106714813

版权

python 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

xml作为数据存储的一种格式，有着较大使用意义。

1、读取xml

def xml():

    tree = ET.parse(datarxml)
    root = tree.getroot()    #r解析整个xml
    size = root.find('size')  #root同层
    w = int(size.find('width').text)#root同层
    h = int(size.find('height').text)#root同层
    bboxes = []
    labels = []
    bboxes_ignore = []
    labels_ignore = []
    for obj in root.findall('object'):#root同层
        name = obj.find('name').text  #root子层进行处理。
        difficult = int(obj.find('difficult').text)
        bnd_box = obj.find('bndbox')
        bbox = [
            int(bnd_box.find('xmin').text),
            int(bnd_box.find('ymin').text),
            int(bnd_box.find('xmax').text),
            int(bnd_box.find('ymax').text)
        ]
        if difficult:
            bboxes_ignore.append(bbox)
            # labels_ignore.append(label)
        else:
            bboxes.append(bbox)
            # labels.append(label)

    print("bboxes",bboxes)

2、保存xml

from xml.sax import *
from xml.dom.minidom import Document,parse
import xml.dom.minidom
import os
import datetime

class Movie(object):
    UNKNOWNYEAR = 1890
    UNKNOWNMINUTES = 0
    def __init__(self, title=None, year=UNKNOWNYEAR,
                 minutes=UNKNOWNMINUTES, acquired=None, notes=None):
        self.title = title
        self.year = year
        self.minutes = minutes
        self.acquired = (acquired if acquired is not None
                                  else datetime.date.today())
        self.notes = notes
class MovieContainer(object):

    def __init__(self,fname,movies):
        self.__fname = fname
        self.__movies = movies
    def exportXml(self, fname):
        try:
            doc = Document()
            movies = doc.createElement("MOVIES")
            movies.setAttribute("VERSION", "1.0")
            doc.appendChild(movies)
            movie=doc.createElement("MOVIE")
            movie.setAttribute("YEAR", str(self.__movies.year))
            movie.setAttribute("MINUTES", str(self.__movies.minutes))
            movie.setAttribute("ACQUIRED", str(self.__movies.acquired))
            movies.appendChild(movie)
            title=doc.createElement("TITLE")
            title_value = doc.createTextNode(str(self.__movies.title))
            title.appendChild(title_value)
            movie.appendChild(title)
            notes=doc.createElement("NOTES")
            notes_value = doc.createTextNode(str(self.__movies.notes))
            notes.appendChild(notes_value)
            movie.appendChild(notes)        
            # 将dom对象写入本地xml文件
            with open(fname, 'wb') as f:
                f.write(doc.toprettyxml(indent='\t', newl="\n",encoding='utf-8'))
        except Exception as e:
            error = "Failed to export: {0}".format(e)
            print(error)
        else:
            print("Exported 1 movie records to {0}".format(
                    os.path.basename(fname)))


    def importDOM(self, fname):
        try:
            # 使用minidom解析器打开 XML 文档
            DOMTree = xml.dom.minidom.parse(fname)
            movies = DOMTree.documentElement  
            # 在集合中获取所有电影
            movie_c = movies.getElementsByTagName("MOVIE")        
            for movie in movie_c:
                try:            
                    #print("*****Movie*****")
                    if movie.hasAttribute("YEAR"):
                        year=movie.getAttribute("YEAR")
                    if movie.hasAttribute("MINUTES"):
                        minutes=movie.getAttribute("MINUTES")
                    if movie.hasAttribute("ACQUIRED"):
                        ymd=movie.getAttribute("ACQUIRED").split("-")
                        if len(ymd) != 3:
                            raise ValueError("invalid acquired date {0}".format(
                                    str(movie.getAttribute("ACQUIRED"))))
                        acquired = datetime.date(int(ymd[0]), int(ymd[1]),
                                                int(ymd[2]))
                    title = notes = None
                    title_tag = movie.getElementsByTagName('TITLE')[0]
                    title=title_tag.childNodes[0].data  
                    notes_tag = movie.getElementsByTagName('NOTES')[0]
                    try:
                        notes=notes_tag.childNodes[0].data
                    except Exception:
                        raise ValueError("missing title or notes")
                except ValueError as e:
                    print("Failed to import: {0}".format(e))
                print(title,year,minutes,acquired,notes)
        except Exception as e:
            print(e)


    def importSAX_standard(self, fname):
        #try:
            # 创建一个 XMLReader
            parser = xml.sax.make_parser()
            # turn off namepsaces
            parser.setFeature(xml.sax.handler.feature_namespaces, 0)        
            # 重写 ContextHandler
            Handler = MovieHandler(self.__movies)
            parser.setContentHandler(Handler)                 
            parser.parse(fname)
            print("Imported 1 movie records from {0}".format(
                    os.path.basename(fname)))        
        #except Exception as e:
            #print(e)


class MovieHandler(ContentHandler):

    def __init__(self, movies):
        super(ContentHandler, self).__init__()
        self.CurrentData = ""
        self.movies = movies
        self.text = ""
        self.error = None


    def clear(self):
        self.year = None
        self.minutes = None
        self.acquired = None
        self.title = None
        self.notes = None


    def startElement(self, tag, attributes):
        #self.CurrentData = tag
        if tag == "MOVIE":
            self.clear()
            self.year = int(attributes["YEAR"])
            self.minutes = int(attributes["MINUTES"])
            ymd = attributes["ACQUIRED"].split("-")
            if len(ymd) != 3:
                raise ValueError("invalid acquired date {0}".format(
                        str(attributes["ACQUIRED"])))
            self.acquired = datetime.date(int(ymd[0]),
                    int(ymd[1]), int(ymd[2]))
        elif tag in ("TITLE", "NOTES"):
            self.text = ""
        return True


    def characters(self, text):
        self.text += text
        return True


    def endElement(self, tag):
        if tag == "MOVIE":
            if (self.year is None or self.minutes is None or
                self.acquired is None or self.title is None or
                self.notes is None or not self.title):
                raise ValueError("incomplete movie record")
            print(self.title, self.year,
                    self.minutes, self.acquired,self.notes)

        elif tag == "TITLE":
            self.title = self.text.strip()
        elif tag == "NOTES":
            self.notes = self.text.strip()
        return True


if __name__ == "__main__":

    textdata=[["God save world",1989,45,None,"HELLO WORLD"]]
    fname="/home/yrd/work/movietest.xml"
    for data in textdata:
        movie=Movie(data[0],data[1],data[2],data[3],data[4])
        moviecontainer=MovieContainer(fname, movie)
        print("#exportDOM")
        moviecontainer.exportXml(fname)
        print("#importDOM")
        moviecontainer.importDOM(fname)
        print("#importSAX")
        moviecontainer.importSAX_standard(fname)

CV/NLP大虾

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python xml

xml作为数据存储的一种格式，有着较大使用意义。1、读取xmldef xml(): tree = ET.parse(datarxml) root = tree.getroot() #r解析整个xml size = root.find('size') #root同层 w = int(size.find('width').text)#root同层 h = int(size.find('height').text)#root同层 bboxes = []
复制链接

扫一扫

专栏目录