golang 使用线程池进行高并发爬虫
gospider 介绍
gospider 是一个golang 爬虫神器,拥有python到golang爬虫过渡的所有必需库。用于python爬虫从业者快速且无坑的过渡到golang
安装
go get -u gitee.com/baixudong/gospider
gitee地址
https://gitee.com/baixudong/gospider
github地址
https://github.com/baixudong007/gospider
代码示例
package main
import (
"context"
"log"
"net/url"
"gitee.com/baixudong/gospider/requests"
"gitee.com/baixudong/gospider/thread"
)
var reqCli *requests.Client
func init() {
var err error
reqCli, err = requests.NewClient(nil, requests.ClientOption{
GetProxy: func(ctx context.Context, url *url.URL) (string, error) { //自动获取代理
return "", nil
},
})
if err != nil {
log.Panic(err)
}
}
func test(ctx context.Context, num int) {
log.Printf("第%d个请求开始", num)
resp, err := reqCli.Request(ctx, "get", "http://myip.top")
if err != nil {
log.Printf("第%d个请求失败%s", num, err.Error())
} else {
log.Printf("第%d个请求成功%d", num, resp.StatusCode())
}
log.Printf("第%d个请求结束", num)
}
func main() {
threadCli := thread.NewClient(nil, 3) //限制并发为3
for i := 0; i < 10; i++ {
//读取任务
threadCli.Write(&thread.Task{
Func: test,
Args: []any{i},
})
}
threadCli.Join()
}