使用golang解析html的标签,获取标签里面的内容,并修改标签的内容。
import (
"net/http"
"github.com/NiuStar/log/fmt"
"github.com/PuerkitoBio/goquery"
)
//goquery包在github上的地址:https://github.com/PuerkitoBio/goquery
//解析html文件里面的标签,获取标签里面的内容
func PageQuaryTest(baseUrl string){
res, err := http.Get(baseUrl)
if err != nil {
fmt.Println(err.Error())
} else {
doc, err := goquery.NewDocumentFromResponse(res)
if err == nil{
doc.Find("img").Each(func(i int, s *goquery.Selection) {
//解析<div>标签
//h,err := s.Html()
v,t := s.Attr("src")
fmt.Println("v--->",v," t--->",t)
//fmt.Println(i, s.Text())
s.SetAttr("src","")//修改标签的内容
})
}else{
fmt.Println("err--->",err)
}
doc.Find("div").Each(func(i int, s *goquery.Selection) {
//解析<div>标签
fmt.Println(i, s.Text())
})
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
band := s.Find("a").Text()
title := s.Find("i").Text()
fmt.Println("Review %d: %s - %s\n", i, band, title)
})
}
}