python3实现爬取百度图库的图片

最新推荐文章于 2021-02-04 06:20:31 发布

置顶 qq_22253209

最新推荐文章于 2021-02-04 06:20:31 发布

阅读量1k

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/qq_22253209/article/details/80290581

版权

python 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

环境：python3

#!/usr/bin/python
# -*-coding:utf-8-*-
# FileName:spider.py
# Author:Cody
# Date:2018/5/11 22:10

import requests  # http客户端
import re  # 正则表达式模块
import random #随机数


def spiderPic(html, keyword):
    print("正在查找：" + keyword + '对应的图片，正在从百度图库重下载，请稍等。。。  ')
    for addr in re.findall(str('"objURL":"(.*?)"'), html, re.S):
        # print("现在正在爬取的URL地址：" + str(addr)[0:50] + '...')
        print("现在正在爬取的URL地址：" + addr)
        try:
            pics = requests.get(addr, timeout=10)
        except requests.exceptions.ConnectionError:
            print("当前Url请求错误")
            continue
        #假设产生的随机数不重复,在E盘下建img文件夹
        fq = open('E:\\img\\'+str(random.randrange(1000, 2000)) + '.jpg','w+b')
        fq.write(pics.content)
        fq.close()
        print('写入完成')


if __name__ == "__main__":
    word = input("请输入关键词：")
    result = requests.get("https://image.baidu.com/search/index?tn=baiduimage&ipn=r&istype=2&ie=utf-8&word=" + word)
    # print(result.text)
    print("写入完毕")
    spiderPic(result.text, word)