爬取百度图片的内容

weixin_51749975

于 2022-10-11 15:34:14 发布

阅读量216

点赞数

文章标签：服务器爬虫 python

本文链接：https://blog.csdn.net/weixin_51749975/article/details/127264733

版权

import requests
import tkinter as tk
from tkinter import filedialog
from utils.logging_tool.log_control import INFO
from tools.TimeTools import TimeTools
import time




def save_directory():
    root = tk.Tk()
    root.withdraw()
    f_path = filedialog.askdirectory()
    return f_path


def get_picture():

    work = input('输入想要下载的图片：')
    page = input('请输入要获取多少页(百度图片下滑时默认一页显示30张)：')
    c = input('选择图片的width（建议1920或者2560，默认直接回车:）')
    b = input('选择图片的height（建议1080或者1440，默认直接回车:）')
    page = int(page) + 1
    header = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
    }
    num = 0
    a = 1
    n = 0
    pn = 1
    # pn是从第几张图片获取 百度图片下滑时默认一次性显示30张
    file_path = save_directory()
    for m in range(1, page):
        url = 'https://image.baidu.com/search/acjson?'
        param = {
            'tn': 'resultjson_com',
            'logid': '8846269338939606587',
            'ipn': 'rj',
            'ct': '201326592',
            'is': '',
            'fp': 'result',
            'queryWord': '%s' % work,
            'cl': '2',
            'lm': '-1',
            'ie': 'utf-8',
            'oe': 'utf-8',
            'adpicid': '',
            'st': '-1',
            'z': '',
            'ic': '',
            'hd': '',
            'latest': '',
            'copyright': '',
            'word': '%s' % work,
            's': '',
            'se': '',
            'tab': '',
            'width': '%s' % c,
            'height': '%s' % b,
            'face': '0',
            'istype': '2',
            'qc': '',
            'nc': '1',
            'fr': '',
            'expermode': '',
            'force': '',
            'cg': 'girl',
            'pn': pn,  # 从第几张图片开始
            'rn': '30',
            'gsm': '1e',
        }
        page_text = requests.get(url=url, headers=header, params=param)
        page_text.encoding = 'utf-8'
        page_text = page_text.json(strict=False)
        info_list = page_text['data']
        del info_list[-1]
        img_path_list = []
        for i in info_list:
            img_path_list.append(i['thumbURL'])

        for img_path in img_path_list:
            num += 1
            str_time = str(TimeTools().get_now_date())[:10] + "_" + str(time.time())[11:]
            try:
                img_data = requests.get(url=img_path).content
                img_path1 = file_path + "/" + work + str_time + '.png'
                with open(img_path1, 'wb') as fp:
                    INFO.logger.info(f"正在下载中，文件路径为{img_path1}---->{num}")
                    fp.write(img_data)
            except Exception as e:
                raise f"下载出错！{e}"
            n = n + 1
            a += 1
        pn += 29


def stat():
    while True:
        get_picture()
        INFO.logger.info('下载完成')
        res = input('是否继续y/n')
        if res == 'y':
            continue
        else:
            break


if __name__ == '__main__':
    stat()