package main
import (
"net/http"
"io/ioutil"
"regexp"
"strings"
"os"
"strconv"
"fmt"
)
func Check(err error) {
defer func() { err = nil }()
if err != nil {
println("****ERROR****")
panic(err)
}
}
// 获取网页
func GetHTML(url string) (body string) {
res, err := http.Get(url)
Check(err)
defer res.Body.Close()
// 网页格式化
data, err := ioutil.ReadAll(res.Body)
Check(err)
body = string(data)
return
}
// 获取下载文件
func downloadfile(url string) (file []byte) {
res, err := http.Get(url)
Check(err)
defer res.Body.Close()
// 格式化
file, err = ioutil.ReadAll(res.Body)
Check(err)
return
}
func main() {
println("======================================================\n")
println("\t小图片爬取程序\n")
// 爬取页码
//pages := []string{"2", "3", "4", "5", "6", "7", "8", "9", "10"}
pages := []string{"2"}
for _, index := range pages {
GetImages("http://boards.4chan.org/s/" + index + "/")
}
}
// 图片匹配正则
var imageItemExp = regexp.MustCompile(`"//i\.4cdn\.org/s/[0123456789]+s\.jpg"`)
// 下载图片方法
func GetImages(url string) {
// 提取图片链接
body := GetHTML(url)
imgs := imageItemExp.FindAllStringSubmatch(body, 10000)
// 链接格式化(去掉引号)
imgUrl := make([]string, 0)
for _, v := range imgs {
u := strings.Replace(v[0], "\"", "", -1)
url := u[0:28]
imgUrl = append(imgUrl, url)
}
// 创建目录
dirName := "图片存放目录"
os.MkdirAll(dirName, 0777)
// 下载图片
for i, v := range imgUrl {
fileName := "./" + dirName + "/img-" + strconv.Itoa(i) + ".jpg"
data := downloadfile("http:" + v + ".jpg")
// 保存文件
if err := ioutil.WriteFile(fileName, data, 0666); err != nil {
println("****下载错误****")
panic(err)
} else {
fmt.Println("下载图片:", fileName)
}
}
}