1、Go语言获取HTML的源码,为string格式,用第三方包(“github.com/PuerkitoBio/goquery”)解析string格式的HTML源码。
2、代码如下:
package main
import (
"fmt"
"os/exec"
"log"
"strings"
"github.com/PuerkitoBio/goquery"
)
/*
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index</title>
</head>
<body>
<table>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="xxxx.whl">xxxx.whl</a></td><td align="right">2019-11-30 13:22 </td><td align="right">6.8M</td><td> </td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="xxxxxx.whl">xxxxxx.whl</a></td><td align="right">2019-11-30 13:39 </td><td align="right">6.7M</td><td> </td></tr>
<tr><th colspan="5"><hr></th></tr>
</table>
</body></html>
*/
func main() {
order := `curl --noproxy "*" http://10.11.**.**:80/`
fmt.Println(order)
cmd := exec.Command("/bin/bash", "-c", order)
out, err := cmd.Output()
if err != nil {
fmt.Println("err",err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(out)))
if err != nil {
log.Fatal(err)
}
doc.Find("tr").Each(func(i int, s *goquery.Selection) {
href, exists := s.Find("a").Attr("href")
if !exists {
return
}
fmt.Printf("result %d: %s \n", i, href)//xxxx.whl xxxxxx.whl
})
}