python爬取百度图片,自定义关键字和页数

修改了下载的图片打不开的问题,原因:请求头
可执行文件下载

import requests
import time
import os
from multiprocessing import Pool,cpu_count,current_process,Process
import jsonpath


# search_url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word={}&s=&se=&tab=&width=1920&height=1080&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=30&rn=30&gsm=1e&1593756552047='

timeout = 10
# 下载图片保存路径
DIR_PATH = r"c:\meizi\百度图片"
header = \
    {
        "Accept": "text/plain, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Cookie": "BDqhfp=%E9%AA%91%E8%BD%A6%26%26-10-1undefined%26%2612665%26%2623; PSTM=1606973550; BIDUPSID=5D5E65AEB48C5223EF8AB2DFA77C94EF; BDUSS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; BDUSS_BFESS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; __yjs_duid=1_608c5c32f7291761601c09971f8b7ebd1620264361349; MCITY=-61119%3A; BAIDUID=A953043FB53F34466E8A697F5A2ACA90:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; BAIDUID_BFESS=A953043FB53F34466E8A697F5A2ACA90:FG=1; delPer=0; BDSFRCVID_BFESS=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; ZD_ENTRY=empty; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; PSINO=6; H_PS_PSSID=33802_33971_31253_33848_33607_34111_34092; BA_HECTOR=2k00a000240h008ker1gc5s830q; __yjs_st=2_MDRhN2UwZWJiODNkOWYwYWFiZjY2MmIyZmFmODlmYWQyMGM4NDIzMmEwZGJmYWU5OGY5YTE2OTBjNzI0NWEyMTJmMDc5ZjNhYzI2ZGRkMzc0ZmVjYTA0NmE1NTlhYmQ5NDI3NzgxOWFjMTJlOTM2NTVlYTkwYjA0YWE5YjUzYmEyYTNkNTJmNTkyYWIxYTBiZjhhMzM3OGQxMGRlYjZlZjc1OTEwNzAyYmQ0MWQ3ZmFjMzM5MjZhZWNmNmQwMTI1YjBhOGVjYmZhMzI4YTdlMDk5Y2NhZWY1NTk5MWEwZGNjOWVmOWJlNzdlNmM3MmVlMGIxZjQ3ZGZlMGJjY2UzZV83XzlhMmQ3ZWNk; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=www.baidu.com; cleanHistoryStatus=0; indexPageSugList=%5B%22%E9%AA%91%E8%BD%A6%22%2C%22python%20jsonpath%22%2C%22%E8%BD%AE%E6%92%AD%E5%9B%BE%22%2C%22%E6%9C%80%E7%BE%8E%E9%A3%8E%E6%99%AF%22%5D; ab_sr=1.0.1_NjE3ZThmYmFiOGExYzhhOWYzNTgzMzA5YWU4NTVjN2Y0Y2U3Mzk0ZGVlMzViODA1MmIzNzg1MTM2OTFlODZhYTliZjczYzE1NzYwOWE1ZDk2Zjc1ZTUzOGRlNzE2Yjg1YzgwOTAyZjYxZDg5NDU4MTRiMTQyOTIwMjRjZjVhYjZkYWVhYWEyNTY3OGVlZWM0NzlhMGZjNjU3MDU4NTA2NTY4NGNiZWMzNjIzNmQwM2EzMjk1MzAwOGY4YWMwOGVh",
        "Host": "image.baidu.com",
        "Pragma": "no-cache",
        "Referer": "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1623389928681_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=%E9%AA%91%E8%BD%A6",
        "sec-ch-ua": '"Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
        "sec-ch-ua-mobile": "?0",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }


def main():
    try:
        word = input('请输入关键字:')
        page = input('请输入页数:')
        # page = "1"
        # word = '美女'
        urls = ['https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word={word}&s=&se=&tab=&width=1920&height=1080&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn={page}&rn=30&gsm=b4&1593764375958='.format(page=cnt * 30, word=word)
                for cnt in range(1, int(page) + 1)]
        print(len(urls))
        img_list=[]
        for ind, i in enumerate(urls):
            r = requests.get(url=i, headers=header, timeout=timeout).json(strict=False)
            img_urls1 = jsonpath.jsonpath(r, '$..data[.thumbURL')
            img_urls2 = jsonpath.jsonpath(r, '$..data[.hoverURL')
            img_urls3 = jsonpath.jsonpath(r, '$..data[.middleURL')
            if img_urls1:
                img_list=img_list+img_urls1
            elif img_urls2:
                img_list=img_list+img_urls2
            else:
                img_list=img_list+img_urls3
        return {"list":img_list,"word":word}
    except Exception as e:
        print(e, 'enumerate2')
        print(r)
        return {"list":[],"word":''}

def save_imgX(url,file_name,index,n,path):
    try:
        time.sleep(0.1)
        img_header={
            "authority": "gimg2.baidu.com",
            "method":"GET",
            "scheme": "https",
            "accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "accept-encoding": "gzip, deflate, br",
            "accept-language": "zh-CN,zh;q=0.9",
            "cache-control": "no-cache",
            "cookie":"PSTM=1606973550; BIDUPSID=5D5E65AEB48C5223EF8AB2DFA77C94EF; BDUSS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; BDUSS_BFESS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; __yjs_duid=1_608c5c32f7291761601c09971f8b7ebd1620264361349; MCITY=-61119%3A; BAIDUID=A953043FB53F34466E8A697F5A2ACA90:FG=1; BDSFRCVID=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; BDSFRCVID_BFESS=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; H_PS_PSSID=33802_33971_31253_33848_33607_34111_34092; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID_BFESS=A953043FB53F34466E8A697F5A2ACA90:FG=1",
            "pragma": "no-cache",
            "sec-ch-ua": 'Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
            "sec-ch-ua-mobile":"?0",
            "sec-fetch-dest": "document",
            "sec-fetch-mode": "navigate",
            "sec-fetch-site": "none",
            "sec-fetch-user": "?1",
            "upgrade-insecure-requests": "1",
            "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
        }
        r = requests.get(url, headers=img_header, timeout=timeout).content
        if not os.path.exists(path):
            os.makedirs(path)
        os.chdir(path)
        with open(file_name, 'wb') as f:
            f.write(r)
            print('保存成功!第:{a}张/共{b}张'.format(a=index+1, b=n))
    except Exception as e:
        print(e, 'write')


def save_img(img_list,word):
    for index, url in enumerate(img_list):
        try:
            file_name = "{index}.jpg".format(index=index + 1)
            path = os.path.join(DIR_PATH,word)
            
            # file_name =os.path.join(path,file_name)
            pool.apply_async(save_imgX,(url,file_name,index,len(img_list),path))
        except Exception as e:
            print(e, 'enumerate1')

if __name__ == '__main__':
    pool=Pool(6)
    obj=main()
    st=time.time()
    save_img(obj.get("list"),obj.get("word"))
    pool.close()
    pool.join()
    et=time.time()
    print(f'用时{et-st}秒')



在这里插入图片描述

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值