Golang抓取百度图片搞笑分类并入库(mongodb)

练手之作,编译即可运行(前提是打开mongodb),可以在win/linux下使用。勿作非法用途。

package main

import (
_"net/http"
"log"
"os"
"io/ioutil"
"crypto/md5"
"encoding/hex"
"github.com/davecgh/go-spew/spew"
"github.com/bitly/go-simplejson"
"labix.org/v2/mgo"
_"labix.org/v2/mgo/bson"
_"github.com/PuerkitoBio/gocrawl"
_"github.com/PuerkitoBio/goquery"
"./eva"
)

type Item struct {
Imgurl string
Imgwidth int
Imgheight int
Thumburl string
Thumbwidth int
Thumbheight int
Desc string
Date string
Likes int
Localfile string
ThumbLocalfile string
}

var BAIDU *mgo.Collection

func md5Byte(s []byte) string {
h := md5.New()
h.Write(s)
return hex.EncodeToString(h.Sum(nil))
}

func makeFname(basedir, s string) string {
a := md5Byte([]byte(s))
//cwd, _ := os.Getwd()
c :="./"+ basedir +"/"+ a[:3] +"/"+ a[3:6] +"/"+ a[6:9] +"/"
//log.Println(c)
_, err := os.Stat(c)
if err != nil && !os.IsExist(err) {
os.MkdirAll(c, 0755)
}
return c + a
}

func downloadImage(url string, to string) {
headers := Eva.M{
"Accept-Encoding":"gzip,deflate,sdch",
"Referer":"http://image.baidu.com/channel/funny",
"Host":"image.baidu.com", 
}
cookies := Eva.M{
"user":"baidu", 
}
req := &Eva.Request{}
err, content, resp := req.Do("GET", url, nil, headers, cookies, 30)
if err != nil {
spew.Printf("[-] download failed(%d) : %s
", resp.StatusCode, url)
return
}
//spew.Dump(content, resp)
ioutil.WriteFile(to, content, 0755)
}

func insert (item *Item) {
spew.Dump(item)
BAIDU.Insert(item)
}

func connectMongo() (*mgo.Collection, *mgo.Session) {
session, err := mgo.Dial("127.0.0.1")
 if err != nil {
panic(err)
}
 //defer session.Close()
 // Optional. Switch the session to a monotonic behavior. 
 session.SetMode(mgo.Monotonic, true)
 return session.DB("xiaohua").C("baidu"), session
}

func main() {
collection, session := connectMongo()
BAIDU = collection
defer func() {
session.Close()
if err := recover(); err != nil{
log.Println(err)
}
}()
var url ="http://image.baidu.com/channel/listjson?fr=channel&tag1=%E6%90%9E%E7%AC%91&tag2=%E5%85%A8%E9%83%A8&sorttype=0&pn=0&rn=30&ie=utf8&oe=utf-8&app=img.browse.channel.general&1393916968302"
body, err := Eva.HttpGet(url)
Eva.Check(err)
json, err := simplejson.NewJson(body)
if err != nil {
log.Fatal("error:", err)
}
//spew.Dump(json.Get("Data"))
data, err := json.Get("data").Array()
if err != nil {
log.Fatal("error:", err)
}

for _, v := range data {
vv := v.(map[string]interface {})
//spew.Dump(vv["date"])
item := new (Item)
item.Imgurl = vv["download_url"].(string)
item.Imgwidth = int(vv["image_width"].(float64))
item.Imgheight = int(vv["image_height"].(float64))
item.Thumburl = vv["thumbnail_url"].(string)
item.Thumbwidth = int(vv["thumbnail_width"].(float64))
item.Thumbheight = int(vv["thumbnail_height"].(float64))
item.Desc = vv["desc"].(string)
item.Date = vv["date"].(string)
item.Likes = 0
item.Localfile = makeFname("tmp", item.Imgurl);
//item.Localfile ="./tmp/"+ md5Byte([]byte(item.Imgurl))
downloadImage(item.Imgurl, item.Localfile)
item.ThumbLocalfile = makeFname("tmp", item.Thumburl);
//item.ThumbLocalfile ="./tmp/thumb/"+ md5Byte([]byte(item.Thumburl))
downloadImage(item.Thumburl, item.ThumbLocalfile)
//spew.Dump(item)
insert(item)
}
}

转载于:https://my.oschina.net/u/3626804/blog/1836981

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值