爬取百度图片的内容

import requests
import tkinter as tk
from tkinter import filedialog
from utils.logging_tool.log_control import INFO
from tools.TimeTools import TimeTools
import time




def save_directory():
    root = tk.Tk()
    root.withdraw()
    f_path = filedialog.askdirectory()
    return f_path


def get_picture():

    work = input('输入想要下载的图片:')
    page = input('请输入要获取多少页(百度图片下滑时默认一页显示30张):')
    c = input('选择图片的width(建议1920或者2560,默认直接回车:)')
    b = input('选择图片的height(建议1080或者1440,默认直接回车:)')
    page = int(page) + 1
    header = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
    }
    num = 0
    a = 1
    n = 0
    pn = 1
    # pn是从第几张图片获取 百度图片下滑时默认一次性显示30张
    file_path = save_directory()
    for m in range(1, page):
        url = 'https://image.baidu.com/search/acjson?'
        param = {
            'tn': 'resultjson_com',
            'logid': '8846269338939606587',
            'ipn': 'rj',
            'ct': '201326592',
            'is': '',
            'fp': 'result',
            'queryWord': '%s' % work,
            'cl': '2',
            'lm': '-1',
            'ie': 'utf-8',
            'oe': 'utf-8',
            'adpicid': '',
            'st': '-1',
            'z': '',
            'ic': '',
            'hd': '',
            'latest': '',
            'copyright': '',
            'word': '%s' % work,
            's': '',
            'se': '',
            'tab': '',
            'width': '%s' % c,
            'height': '%s' % b,
            'face': '0',
            'istype': '2',
            'qc': '',
            'nc': '1',
            'fr': '',
            'expermode': '',
            'force': '',
            'cg': 'girl',
            'pn': pn,  # 从第几张图片开始
            'rn': '30',
            'gsm': '1e',
        }
        page_text = requests.get(url=url, headers=header, params=param)
        page_text.encoding = 'utf-8'
        page_text = page_text.json(strict=False)
        info_list = page_text['data']
        del info_list[-1]
        img_path_list = []
        for i in info_list:
            img_path_list.append(i['thumbURL'])

        for img_path in img_path_list:
            num += 1
            str_time = str(TimeTools().get_now_date())[:10] + "_" + str(time.time())[11:]
            try:
                img_data = requests.get(url=img_path).content
                img_path1 = file_path + "/" + work + str_time + '.png'
                with open(img_path1, 'wb') as fp:
                    INFO.logger.info(f"正在下载中,文件路径为{img_path1}---->{num}")
                    fp.write(img_data)
            except Exception as e:
                raise f"下载出错!{e}"
            n = n + 1
            a += 1
        pn += 29


def stat():
    while True:
        get_picture()
        INFO.logger.info('下载完成')
        res = input('是否继续y/n')
        if res == 'y':
            continue
        else:
            break


if __name__ == '__main__':
    stat()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值