涉及go的net/http包的使用,文件读写,log日志、正则匹配、go routine并发、协程同步。
package main
import (
"fmt"
"net/http"
"io/ioutil"
"regexp"
"sync"
"log"
"time"
"os"
)
var wg sync.WaitGroup
func main() {
log_f, err := os.OpenFile("down.log", os.O_APPEND|os.O_CREATE|os.O_RDWR, 0666)
if err !=nil {
log.Fatal(err)
}
defer log_f.Close()
log.SetOutput(log_f)
//记录用时
t := time.Now()
num := 20 //开启20个协程爬20个网页
wg.Add(num)
for i:=0; i<=num; i++ {
url := fmt.Sprintf("https://~~不显示网址~~ /20140421192446_%d.htm", i+2)
fmt.Println(url)
log.Printf("GET %s\n", url)
go GetImgUrl(url, i)
}
wg.Wait()//主协程等待
end := time.Since(t)
fmt.Println("程序用时:", end)
}
func GetImgUrl(url string, i int) {
defer wg.Done()
client := &http.Client{}
req,_ := http.NewRequest("GET",url,nil)
req.Header.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0")
response, err := client.Do(req)
if err != nil {
fmt.Println("http get error ",err)
return
}
fmt.Println(url, response.Status)
log.Println(url, response.Status)
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
fmt.Println("response.Body error")
return
}
reg := "http://~~不显示网址~~ /uploads/tu/[0-9]{6}/[0-9]{4}/[0-9a-zA-Z]{10}.jpg"
compile := regexp.MustCompile(reg)
html := []byte(string(body))
submatch := compile.FindAllSubmatch(html, -1)
fmt.Println("url:" , url, "match:", len(submatch))
if len(submatch) != 0 {
GetImg(string(submatch[0][0]), i)
}else {
fmt.Println("url:" , url, "null", len(submatch))
}
}
func GetImg(url string, i int) {
client := &http.Client{}
req,_ := http.NewRequest("GET",url,nil)
req.Header.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0")
response, err := client.Do(req)
if err != nil {
fmt.Println("http get error ",err)
return
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
fmt.Println("response.Body error")
return
}
timeStr := time.Now().Format("2006-01-02 15:04:05")
filename := fmt.Sprintf("./IMG/%s_%d.jpg", timeStr, i)
//fmt.Println(filename)
err = ioutil.WriteFile(filename, body, 0666)
if err != nil {
fmt.Println("ioutil.WriteFile error", err)
return
}else {
fmt.Println(url, " ok")
}
}