需求:解析html代码,样例如下:
<html>
<body>
<div id="newlist_1">
<a id="newlist_1" href="https://example.com">Link 1</a>
</div>
<div id="newlist_2">
<a id="newlist_2" href="https://example.com">Link 2</a>
</div>
</body>
</html>
找出:id=newlist_1 下面的节点
package main
import (
"fmt"
"strings"
"golang.org/x/net/html"
)
func findNodeByID(node *html.Node, id string) *html.Node {
if node.Type == html.ElementNode && node.Data == "a" {
for _, attr := range node.Attr {
if attr.Key == "id" && attr.Val == id {
return node
}
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
found := findNodeByID(child, id)
if found != nil {
return found
}
}
return nil
}
func main() {
htmlString := `<html>
<body>
<div id="newlist_1">
<a id="newlist_1" href="https://example.com">Link 1</a>
</div>
<div id="newlist_2">
<a id="newlist_2" href="https://example.com">Link 2</a>
</div>
</body>
</html>`
reader := strings.NewReader(htmlString)
doc, err := html.Parse(reader)
if err != nil {
fmt.Println("HTML parsing error:", err)
return
}
node := findNodeByID(doc, "newlist_1")
if node != nil {
fmt.Println("Found node:", node.FirstChild.Data)
} else {
fmt.Println("Node not found")
}
}