日志

今天做了改造了爬图片代码:

import re
import os
import requests
import time

global PhotoNum
PhotoNum = 0
PWD = "./huaban/"
head = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
TimeOut = 30

url = "http://huaban.com/favorite/beauty/"
url_image = "http://hbimg.b0.upaiyun.com/"
urlNext = "http://huaban.com/favorite/beauty/?iqkxaeyv&limit=20&wfl=1&max="


def downfile(file, url):
    print("开始下载:", file, url)
    try:
        r = requests.get(url, stream=True)
        with open(file, 'wb') as fd:
            for chunk in r.iter_content():
                fd.write(chunk)
    except Exception as e:
        print("下载失败了", e)


def requestpageText(url):
    try:
        Page = requests.session().get(url, headers=head, timeout=TimeOut)
        Page.encoding = "utf-8"
        return Page.text
    except Exception as e:
        print("联网失败了...重试中", e)
        time.sleep(5)
        print("暂停结束")
        requestpageText(url)


def requestUrl(url):
    global PhotoNum
    print("*******************************************************************")
    print("请求网址:", url)
    text = requestpageText(url)
    pattern = re.compile('{"pin_id":(\d*?),.*?"key":"(.*?)",.*?"like_count":(\d*?),.*?"repin_count":(\d*?),.*?}', re.S)
    items = re.findall(pattern, text)
    #print(items)
    max_pin_id = 0
    for item in items:
        max_pin_id = item[0]
        x_key = item[1]
        x_like_count = int(item[2])
        x_repin_count = int(item[3])
        if (x_repin_count > 10 and x_like_count > 10) or x_repin_count > 100 or x_like_count > 20:
            print("开始下载第{0}张图片".format(PhotoNum))
            url_item = url_image + x_key
            filename = PWD + str(max_pin_id) + ".jpg"
            if os.path.isfile(filename):
                print("文件存在:", filename)
                continue

            downfile(filename, url_item)
            PhotoNum += 1
    requestUrl(urlNext + max_pin_id)


if not os.path.exists(PWD):
    os.makedirs(PWD)
requestUrl(url)

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值