Headless Chrome
Headless Chrome 是 Chrome 浏览器的无界面形态,可以在不打开浏览器的前提下,使用所有 Chrome 支持的特性运行你的程序。相比于现代浏览器,Headless Chrome 更加方便测试 web 应用,获得网站的截图,做爬虫抓取信息等。相比于出道较早的 PhantomJS,SlimerJS 等,Headless Chrome 则更加贴近浏览器环境。
容器部署
https://github.com/Zenika/alpine-chrome
docker search alpine-chrome
docker pull zenika/alpine-chrome
docker container run -d -p 9222:9222 zenika/alpine-chrome --no-sandbox --remote-debugging-address=0.0.0.0 --remote-debugging-port=9222 https://www.chromestatus.com/
示例代码
curl http://127.0.0.1:9222/json/version //获取版本等信息
// Command screenshot is a chromedp example demonstrating how to take a
// screenshot of a specific element and of the entire browser viewport.
package main
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net"
"net/http"
"net/url"
"sync"
"time"
"github.com/chromedp/chromedp"
"github.com/panjf2000/ants/v2"
)
type chromeVersion struct {
Browser string `json:"Browser"`
ProtocalVersion string `json:"Protocol-Version"`
UserAgent string `json:"User-Agent"`
V8Version string `json:"V8-Version"`
WebKitVersion string `json:"WebKit-Version"`
WebSocketDebuggerURL string `json:"webSocketDebuggerUrl"`
}
// fullScreenshot takes a screenshot of the entire browser viewport.
//
// Note: chromedp.FullScreenshot overrides the device's emulation settings. Use
// device.Reset to reset the emulation and viewport settings.
func fullScreenshot(urlstr string, quality int, res *[]byte) chromedp.Tasks {
return chromedp.Tasks{
chromedp.Navigate(urlstr),
chromedp.FullScreenshot(res, quality),
}
}
func getChromeVersion(address string) (string, error) {
u, _ := url.Parse("http://" + address)
ip, err := net.ResolveIPAddr("ip", u.Hostname())
if err != nil {
return "", err
}
link := "http://" + ip.String() + ":" + u.Port() + `/json/version`
resp, err := http.Get(link)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
chromeVer := &chromeVersion{}
json.Unmarshal(body, chromeVer)
return chromeVer.WebSocketDebuggerURL, nil
}
func myFunc(i interface{}) {
// create context
ctx, cancel2 := chromedp.NewContext(allocatorContext)
defer cancel2()
ctx, cancel3 := context.WithTimeout(ctx, 300*time.Second)
defer cancel3()
ctx, cancel4 := context.WithDeadline(ctx, time.Now().Add(300*time.Second))
defer cancel4()
var buf []byte
if err := chromedp.Run(ctx, fullScreenshot(`https://brank.as/`, 90, &buf)); err != nil {
fmt.Println("idx:", i, "failed run")
fmt.Println(err)
err := chromedp.Cancel(ctx)
if err != nil {
fmt.Println("Cancel", err)
}
return
}
if err := ioutil.WriteFile(fmt.Sprintf("./tmp/%v_fullScreenshot.png", i), buf, 0o644); err != nil {
fmt.Println(err)
}
err := chromedp.Cancel(ctx)
if err != nil {
fmt.Println("Cancel", err)
}
}
var allocatorContext context.Context
func main() {
ws, err := getChromeVersion("127.0.0.1:9222")
if err != nil {
fmt.Println(err)
return
}
allocatorContexttmp, cancel1 := chromedp.NewRemoteAllocator(context.Background(), ws)
defer cancel1()
allocatorContext = allocatorContexttmp
var wg sync.WaitGroup
defer ants.Release()
runTimes := 100
p, _ := ants.NewPoolWithFunc(10, func(i interface{}) {
myFunc(i)
wg.Done()
})
defer p.Release()
for i := 0; i < runTimes; i++ {
wg.Add(1)
_ = p.Invoke(int32(i))
}
wg.Wait()
fmt.Printf("running goroutines: %d\n", p.Running())
}
运行结果
在tmp目录生成png截图文件