linux搞笑图片,Golang抓取百度图片搞笑分类并入库(mongodb)

练手之作,编译即可运行(前提是打开mongodb),可以在win/linux下使用。勿作非法用途。

package main

import (

_"net/http"

"log"

"os"

"io/ioutil"

"crypto/md5"

"encoding/hex"

"github.com/davecgh/go-spew/spew"

"github.com/bitly/go-simplejson"

"labix.org/v2/mgo"

_"labix.org/v2/mgo/bson"

_"github.com/PuerkitoBio/gocrawl"

_"github.com/PuerkitoBio/goquery"

"./eva"

)

type Item struct {

Imgurl string

Imgwidth int

Imgheight int

Thumburl string

Thumbwidth int

Thumbheight int

Desc string

Date string

Likes int

Localfile string

ThumbLocalfile string

}

var BAIDU *mgo.Collection

func md5Byte(s []byte) string {

h := md5.New()

h.Write(s)

return hex.EncodeToString(h.Sum(nil))

}

func makeFname(basedir, s string) string {

a := md5Byte([]byte(s))

//cwd, _ := os.Getwd()

c :="./"+ basedir +"/"+ a[:3] +"/"+ a[3:6] +"/"+ a[6:9] +"/"

//log.Println(c)

_, err := os.Stat(c)

if err != nil && !os.IsExist(err) {

os.MkdirAll(c, 0755)

}

return c + a

}

func downloadImage(url string, to string) {

headers := Eva.M{

"Accept-Encoding":"gzip,deflate,sdch",

"Referer":"http://image.baidu.com/channel/funny",

"Host":"image.baidu.com",

}

cookies := Eva.M{

"user":"baidu",

}

req := &Eva.Request{}

err, content, resp := req.Do("GET", url, nil, headers, cookies, 30)

if err != nil {

spew.Printf("[-] download failed(%d) : %s

", resp.StatusCode, url)

return

}

//spew.Dump(content, resp)

ioutil.WriteFile(to, content, 0755)

}

func insert (item *Item) {

spew.Dump(item)

BAIDU.Insert(item)

}

func connectMongo() (*mgo.Collection, *mgo.Session) {

session, err := mgo.Dial("127.0.0.1")

if err != nil {

panic(err)

}

//defer session.Close()

// Optional. Switch the session to a monotonic behavior.

session.SetMode(mgo.Monotonic, true)

return session.DB("xiaohua").C("baidu"), session

}

func main() {

collection, session := connectMongo()

BAIDU = collection

defer func() {

session.Close()

if err := recover(); err != nil{

log.Println(err)

}

}()

var url ="http://image.baidu.com/channel/listjson?fr=channel&tag1=%E6%90%9E%E7%AC%91&tag2=%E5%85%A8%E9%83%A8&sorttype=0&pn=0&rn=30&ie=utf8&oe=utf-8&app=img.browse.channel.general&1393916968302"

body, err := Eva.HttpGet(url)

Eva.Check(err)

json, err := simplejson.NewJson(body)

if err != nil {

log.Fatal("error:", err)

}

//spew.Dump(json.Get("Data"))

data, err := json.Get("data").Array()

if err != nil {

log.Fatal("error:", err)

}

for _, v := range data {

vv := v.(map[string]interface {})

//spew.Dump(vv["date"])

item := new (Item)

item.Imgurl = vv["download_url"].(string)

item.Imgwidth = int(vv["image_width"].(float64))

item.Imgheight = int(vv["image_height"].(float64))

item.Thumburl = vv["thumbnail_url"].(string)

item.Thumbwidth = int(vv["thumbnail_width"].(float64))

item.Thumbheight = int(vv["thumbnail_height"].(float64))

item.Desc = vv["desc"].(string)

item.Date = vv["date"].(string)

item.Likes = 0

item.Localfile = makeFname("tmp", item.Imgurl);

//item.Localfile ="./tmp/"+ md5Byte([]byte(item.Imgurl))

downloadImage(item.Imgurl, item.Localfile)

item.ThumbLocalfile = makeFname("tmp", item.Thumburl);

//item.ThumbLocalfile ="./tmp/thumb/"+ md5Byte([]byte(item.Thumburl))

downloadImage(item.Thumburl, item.ThumbLocalfile)

//spew.Dump(item)

insert(item)

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值