之前一直采用的是shell里的wget或curl获取返回码的操作,由于效率较低,于是计划利用go的多线程优势写这样一个小工具。经过简单的验证,效率较单线程提升比较明显,唯一很难保证的一点是多线程的状态下会不会导致访问网页出现失败,从而使本来是200的页面返回非200的状态码,这一点有待解决。
代码如下:
package main
import (
"bufio"
"fmt"
"net/http"
"os"
"runtime"
"strings"
"sync"
)
var urlChan chan string
var wg sync.WaitGroup
func main() {
maxProcs := runtime.NumCPU()
runtime.GOMAXPROCS(maxProcs)
fmt.Println("start read")
urlChan = make(chan string, 5000)
file, err := os.Open("C:\\Users\\chant\\Desktop\\addNewUrl")
if err != nil {
fmt.Println(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
urlChan <- scanner.Text()
}
for i := 0; i < 10; i++ {
wg.Add(1)
go fetchUrl()
}
wg.Wait()
fmt.Println("DONE")
}
func fetchUrl() {
for {
if len(urlChan) <= 0 {
break
}
url := <-urlChan
codeArr := make([]int, 0, 10)
response, err := http.Get(url)
if err != nil {
fmt.Println("%s %s", url, err)
return
}
defer response.Body.Close()
for {
if response == nil {
break
}
code := response.StatusCode
req := response.Request
if req != nil {
if strings.Contains(req.URL.Path, "error.htm") {
code = 404
}
}
codeArr = append(codeArr, code)
response = response.Request.Response
}
size := len(codeArr)
var result string
for i := size - 1; i >= 0; i-- {
result = fmt.Sprintf("%s %d", result, codeArr[i])
}
fmt.Println(url, result)
}
wg.Done()
}