golang etree 解析XML

最新推荐文章于 2024-09-03 08:47:11 发布

爱你爱我

最新推荐文章于 2024-09-03 08:47:11 发布

阅读量4.4k

点赞数 2

分类专栏： golang xml解析 Go开发文章标签： golang xml解析

本文链接：https://blog.csdn.net/sserf/article/details/101538084

版权

Go开发同时被 2 个专栏收录

24 篇文章 0 订阅

订阅专栏

golang xml解析

5 篇文章 0 订阅

订阅专栏

首先：

官网地址：https://github.com/beevik/etree，etree作为一个轮子在解析XML文件的工具中占据着很重要的地位，它可以查找节点，轮询、生成XML文件。

XML打开工具：XML Marker，由于一般获取到的XML文件比较大，内容很多，节点对不齐的情况经常出现，用此工具打开文件，可以清晰的看到文件的结构，节点对应点，该节点下面的对应值，以及节点路径，在使用的时候可以方便的查找对应的位置，推荐使用。

一、XML的引入写法

在工程中引入XML的方法，建议用第一种

//方法一
xml := `
<bookstore>
	<book>
		<title>Great Expectations</title>
		<author>Charles Dickens</author>
	</book>
	<book>
		<title>Ulysses</title>
		<author>James Joyce</author>
	</book>
</bookstore>`

//方法二
LoginPut= "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>" +
		" <LogonRequest xmlns=\"http://www.ibm.com/xmlns/systems/power/firmware/web/mc/2012_10/\"" +
		" schemaVersion=\"V1_1_0\">" +
		" <Metadata>" +
		" <Atom/>" +
		" </Metadata>" +
		" <UserID >%s</UserID>" +
		" <Password >%s</Password>" +
		" </LogonRequest>"

二、XML的解析

1. 判断所需节点是否存在的方法

方法1：在某个节点范围下查找指定节点是否存在

//whetherExistNode  判断节点是否存在
func whetherExistNode(doc etree.Element, nodeName string) (exist bool) {
	path := etree.MustCompilePath(nodeName)
	bb := doc.FindElementPath(path)
	if bb != nil {
		return true
	} else {
		return false
	}
	return exist
}

方法2：获取指定节点的值

//getSpecifiedNodeVal 获取指定节点的值
func getSpecifiedNodeVal(doc etree.Document, nodeName string) (dataSlice []string) {
	path := etree.MustCompilePath(nodeName)
	var val string
	for _, t := range doc.FindElementsPath(path) {
		val = t.Text()
		if len(dataSlice) <= 0 {
			dataSlice = append(dataSlice, val)
		} else {
			repeat := false
			for i := 0; i < len(dataSlice); i++ {
				if dataSlice[i] == val {
					repeat = true
				}
			}
			if !repeat {
				dataSlice = append(dataSlice, val)
			}
		}
	}
	return dataSlice
}

方法3：在用SelectElement获取节点值的时候要判断改节点是否存在，如果不加判断，程序直接就会panic退出，后果会比较严重。

	if tempNode.SelectElement("OperatingSystemVersion") != nil {
		systemVersion = tempNode.SelectElement("OperatingSystemVersion").Text()
		if systemVersion != "" {
			systemType = util.GetOSType(systemVersion)
		}
	} else {
		systemVersion = ""
	}

注意点：在用FindElementsPath或FindElements查找节点时建议用FindElementsPath，因为FindElements在查找不到节点时会panic报错，SelectElement和SelectElements,的用法，SelectElements获取到的结果为数组，SelectElement标识选中的单一的节点。

	path := etree.MustCompilePath("//PartitionName")
	doc.FindElementsPath(path)

重点：1.在获取节点值的时候一定要判断几点是否为空

2.添加//标识从头开始查找值

3.doc.SelectElement必须选中开始的父节点，不能跨区域

    //FindElementPath  指定父节点查找指定的接点值:节点不存在不会报错
	aa := doc.SelectElement("feed").SelectElement("entry")
	path := etree.MustCompilePath("updatedd")
	cc := aa.FindElementPath(path)
	if cc != nil {
		fmt.Println(cc.Text())
	} else {
		fmt.Println("cc == nil")
	}

2.etree支持接口返回内容和XML文件两种解析方式

测试用的data.xml文件

<feed xmlns="http://www.w3.org/2005/Atom" xmlns:ns2="http://a9.com/-/spec/opensearch/1.1/" xmlns:ns3="http://www.w3.org/1999/xhtml">
    <id>5134c4aa-8df4-3d41-aafc-927a7546f8b8</id>
    <updated>2019-09-25T03:03:29.615Z</updated>
    <link rel="SELF" href="https://192.168.10.51:12443/rest/api/uom/ManagedSystem/c13e47ce-5967-3845-a500-4b8947a9fb10/LogicalPartition"/>
    <link rel="MANAGEMENT_CONSOLE" href="https://192.168.10.51:12443/rest/api/uom/ManagementConsole/525cb072-1d11-3969-9245-053f0ac6f406"/>
    <generator>IBM Power Systems Management Console</generator>
    <entry>
        <id>323F5D6D-E501-4166-9522-B216B1565862</id>
        <title>LogicalPartition</title>
        <published>2019-09-25T03:03:29.914Z</published>
        <link rel="SELF" href="https://192.168.10.51:12443/rest/api/uom/ManagedSystem/c13e47ce-5967-3845-a500-4b8947a9fb10/LogicalPartition/323F5D6D-E501-4166-9522-B216B1565862"/>
        <author>
            <name>IBM Power Systems Management Console</name>
        </author>
        <etag:etag xmlns:etag="http://www.ibm.com/xmlns/systems/power/firmware/uom/mc/2012_10/" xmlns="http://www.ibm.com/xmlns/systems/power/firmware/uom/mc/2012_10/">-170243083</etag:etag>

    </entry>
    <entry>
        <id>21FE0160-FF83-4852-909B-B7D264E258C8</id>
        <title>LogicalPartition</title>
        <published>2019-09-25T03:03:29.916Z</published>
        <link rel="SELF" href="https://192.168.10.51:12443/rest/api/uom/ManagedSystem/c13e47ce-5967-3845-a500-4b8947a9fb10/LogicalPartition/21FE0160-FF83-4852-909B-B7D264E258C8"/>
        <author>
            <name>IBM Power Systems Management Console</name>
        </author>
        <etag:etag xmlns:etag="http://www.ibm.com/xmlns/systems/power/firmware/uom/mc/2012_10/" xmlns="http://www.ibm.com/xmlns/systems/power/firmware/uom/mc/2012_10/">-1052118977</etag:etag>

    </entry>
</feed>

var testData string = `<entry>
        <id>21FE0160-FF83-4852-909B-B7D264E258C8</id>
        <title>LogicalPartition</title>
        <published>2019-09-25T03:03:29.916Z</published>
        <link rel="SELF" href="https://192.168.10.51:12443/rest/api/uom/ManagedSystem/c13e47ce-5967-3845-a500-4b8947a9fb10/LogicalPartition/21FE0160-FF83-4852-909B-B7D264E258C8"/>
        <author>
            <name>IBM Power Systems Management Console</name>
        </author>
        <etag:etag xmlns:etag="http://www.ibm.com/xmlns/systems/power/firmware/uom/mc/2012_10/" xmlns="http://www.ibm.com/xmlns/systems/power/firmware/uom/mc/2012_10/">-1052118977</etag:etag>
    </entry>`

	//读取字符串的方法-----------采用绝对路径的方法
	//doc := etree.NewDocument()
	//if err := doc.ReadFromString(testData); err != nil {
	//	panic(err)
	//}
	//res := doc.FindElement("./entry[0]/id").Text()
	//fmt.Println(res)  21FE0160-FF83-4852-909B-B7D264E258C8

	//读取文件的方法-----------
	doc := etree.NewDocument()
	if err := doc.ReadFromFile("data.xml"); err != nil {
		panic(err)
	}
	servers := doc.SelectElement("feed")
	for _, server := range servers.SelectElements("entry") {
		fmt.Println(server.SelectElement("author").SelectElement("name").Text())
		if server.SelectElement("author").SelectElement("bb") == nil {
			fmt.Println("测试节点不存在")
		}
	}
	//输出结果
	//IBM Power Systems Management Console
	//测试节点不存在
	//IBM Power Systems Management
	//测试节点不存在

三、关于Etree XML官方文档

etree

The etree package is a lightweight, pure go package that expresses XML in the form of an element tree. Its design was inspired by the Python ElementTree module.

Some of the package's capabilities and features:

Represents XML documents as trees of elements for easy traversal.
Imports, serializes, modifies or creates XML documents from scratch.
Writes and reads XML to/from files, byte slices, strings and io interfaces.
Performs simple or complex searches with lightweight XPath-like query APIs.
Auto-indents XML using spaces or tabs for better readability.
Implemented in pure go; depends only on standard go libraries.
Built on top of the go encoding/xml package.

Creating an XML document

The following example creates an XML document from scratch using the etree package and outputs its indented contents to stdout.

doc := etree.NewDocument()
doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)

people := doc.CreateElement("People")
people.CreateComment("These are all known people")

jon := people.CreateElement("Person")
jon.CreateAttr("name", "Jon")

sally := people.CreateElement("Person")
sally.CreateAttr("name", "Sally")

doc.Indent(2)
doc.WriteTo(os.Stdout)

Output:

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="style.xsl"?>
<People>
  <!--These are all known people-->
  <Person name="Jon"/>
  <Person name="Sally"/>
</People>

Reading an XML file

Suppose you have a file on disk called bookstore.xml containing the following data:

<bookstore xmlns:p="urn:schemas-books-com:prices">

  <book category="COOKING">
    <title lang="en">Everyday Italian</title>
    <author>Giada De Laurentiis</author>
    <year>2005</year>
    <p:price>30.00</p:price>
  </book>

  <book category="CHILDREN">
    <title lang="en">Harry Potter</title>
    <author>J K. Rowling</author>
    <year>2005</year>
    <p:price>29.99</p:price>
  </book>

  <book category="WEB">
    <title lang="en">XQuery Kick Start</title>
    <author>James McGovern</author>
    <author>Per Bothner</author>
    <author>Kurt Cagle</author>
    <author>James Linn</author>
    <author>Vaidyanathan Nagarajan</author>
    <year>2003</year>
    <p:price>49.99</p:price>
  </book>

  <book category="WEB">
    <title lang="en">Learning XML</title>
    <author>Erik T. Ray</author>
    <year>2003</year>
    <p:price>39.95</p:price>
  </book>

</bookstore>

This code reads the file's contents into an etree document.

doc := etree.NewDocument()
if err := doc.ReadFromFile("bookstore.xml"); err != nil {
    panic(err)
}

You can also read XML from a string, a byte slice, or an io.Reader.

Processing elements and attributes

This example illustrates several ways to access elements and attributes using etree selection queries.

root := doc.SelectElement("bookstore")
fmt.Println("ROOT element:", root.Tag)

for _, book := range root.SelectElements("book") {
    fmt.Println("CHILD element:", book.Tag)
    if title := book.SelectElement("title"); title != nil {
        lang := title.SelectAttrValue("lang", "unknown")
        fmt.Printf("  TITLE: %s (%s)\n", title.Text(), lang)
    }
    for _, attr := range book.Attr {
        fmt.Printf("  ATTR: %s=%s\n", attr.Key, attr.Value)
    }
}

Output:

ROOT element: bookstore
CHILD element: book
  TITLE: Everyday Italian (en)
  ATTR: category=COOKING
CHILD element: book
  TITLE: Harry Potter (en)
  ATTR: category=CHILDREN
CHILD element: book
  TITLE: XQuery Kick Start (en)
  ATTR: category=WEB
CHILD element: book
  TITLE: Learning XML (en)
  ATTR: category=WEB

Path queries

This example uses etree's path functions to select all book titles that fall into the category of 'WEB'. The double-slash prefix in the path causes the search for book elements to occur recursively; book elements may appear at any level of the XML hierarchy.

for _, t := range doc.FindElements("//book[@category='WEB']/title") {
    fmt.Println("Title:", t.Text())
}

Output:

Title: XQuery Kick Start
Title: Learning XML

This example finds the first book element under the root bookstore element and outputs the tag and text of each of its child elements.

for _, e := range doc.FindElements("./bookstore/book[1]/*") {
    fmt.Printf("%s: %s\n", e.Tag, e.Text())
}

Output:

title: Everyday Italian
author: Giada De Laurentiis
year: 2005
price: 30.00

This example finds all books with a price of 49.99 and outputs their titles.

path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
for _, e := range doc.FindElementsPath(path) {
    fmt.Println(e.Text())
}

Output:

XQuery Kick Start

Note that this example uses the FindElementsPath function, which takes as an argument a pre-compiled path object. Use precompiled paths when you plan to search with the same path more than once.