修改了下载的图片打不开的问题,原因:请求头
可执行文件下载
import requests
import time
import os
from multiprocessing import Pool,cpu_count,current_process,Process
import jsonpath
# search_url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=©right=&word={}&s=&se=&tab=&width=1920&height=1080&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=30&rn=30&gsm=1e&1593756552047='
timeout = 10
# 下载图片保存路径
DIR_PATH = r"c:\meizi\百度图片"
header = \
{
"Accept": "text/plain, */*; q=0.01",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Cookie": "BDqhfp=%E9%AA%91%E8%BD%A6%26%26-10-1undefined%26%2612665%26%2623; PSTM=1606973550; BIDUPSID=5D5E65AEB48C5223EF8AB2DFA77C94EF; BDUSS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; BDUSS_BFESS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; __yjs_duid=1_608c5c32f7291761601c09971f8b7ebd1620264361349; MCITY=-61119%3A; BAIDUID=A953043FB53F34466E8A697F5A2ACA90:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; BAIDUID_BFESS=A953043FB53F34466E8A697F5A2ACA90:FG=1; delPer=0; BDSFRCVID_BFESS=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; ZD_ENTRY=empty; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; PSINO=6; H_PS_PSSID=33802_33971_31253_33848_33607_34111_34092; BA_HECTOR=2k00a000240h008ker1gc5s830q; __yjs_st=2_MDRhN2UwZWJiODNkOWYwYWFiZjY2MmIyZmFmODlmYWQyMGM4NDIzMmEwZGJmYWU5OGY5YTE2OTBjNzI0NWEyMTJmMDc5ZjNhYzI2ZGRkMzc0ZmVjYTA0NmE1NTlhYmQ5NDI3NzgxOWFjMTJlOTM2NTVlYTkwYjA0YWE5YjUzYmEyYTNkNTJmNTkyYWIxYTBiZjhhMzM3OGQxMGRlYjZlZjc1OTEwNzAyYmQ0MWQ3ZmFjMzM5MjZhZWNmNmQwMTI1YjBhOGVjYmZhMzI4YTdlMDk5Y2NhZWY1NTk5MWEwZGNjOWVmOWJlNzdlNmM3MmVlMGIxZjQ3ZGZlMGJjY2UzZV83XzlhMmQ3ZWNk; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=www.baidu.com; cleanHistoryStatus=0; indexPageSugList=%5B%22%E9%AA%91%E8%BD%A6%22%2C%22python%20jsonpath%22%2C%22%E8%BD%AE%E6%92%AD%E5%9B%BE%22%2C%22%E6%9C%80%E7%BE%8E%E9%A3%8E%E6%99%AF%22%5D; ab_sr=1.0.1_NjE3ZThmYmFiOGExYzhhOWYzNTgzMzA5YWU4NTVjN2Y0Y2U3Mzk0ZGVlMzViODA1MmIzNzg1MTM2OTFlODZhYTliZjczYzE1NzYwOWE1ZDk2Zjc1ZTUzOGRlNzE2Yjg1YzgwOTAyZjYxZDg5NDU4MTRiMTQyOTIwMjRjZjVhYjZkYWVhYWEyNTY3OGVlZWM0NzlhMGZjNjU3MDU4NTA2NTY4NGNiZWMzNjIzNmQwM2EzMjk1MzAwOGY4YWMwOGVh",
"Host": "image.baidu.com",
"Pragma": "no-cache",
"Referer": "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1623389928681_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=%E9%AA%91%E8%BD%A6",
"sec-ch-ua": '"Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
"sec-ch-ua-mobile": "?0",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
def main():
try:
word = input('请输入关键字:')
page = input('请输入页数:')
# page = "1"
# word = '美女'
urls = ['https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=©right=&word={word}&s=&se=&tab=&width=1920&height=1080&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn={page}&rn=30&gsm=b4&1593764375958='.format(page=cnt * 30, word=word)
for cnt in range(1, int(page) + 1)]
print(len(urls))
img_list=[]
for ind, i in enumerate(urls):
r = requests.get(url=i, headers=header, timeout=timeout).json(strict=False)
img_urls1 = jsonpath.jsonpath(r, '$..data[.thumbURL')
img_urls2 = jsonpath.jsonpath(r, '$..data[.hoverURL')
img_urls3 = jsonpath.jsonpath(r, '$..data[.middleURL')
if img_urls1:
img_list=img_list+img_urls1
elif img_urls2:
img_list=img_list+img_urls2
else:
img_list=img_list+img_urls3
return {"list":img_list,"word":word}
except Exception as e:
print(e, 'enumerate2')
print(r)
return {"list":[],"word":''}
def save_imgX(url,file_name,index,n,path):
try:
time.sleep(0.1)
img_header={
"authority": "gimg2.baidu.com",
"method":"GET",
"scheme": "https",
"accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"cookie":"PSTM=1606973550; BIDUPSID=5D5E65AEB48C5223EF8AB2DFA77C94EF; BDUSS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; BDUSS_BFESS=YzdjZwc0ZVQXU5N0V2fkVzWHFSUGNidVA2QWpTandHekJhS1hROX5nelRZanRnRVFBQUFBJCQAAAAAAQAAAAEAAAC446UyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANPVE2DT1RNgbU; __yjs_duid=1_608c5c32f7291761601c09971f8b7ebd1620264361349; MCITY=-61119%3A; BAIDUID=A953043FB53F34466E8A697F5A2ACA90:FG=1; BDSFRCVID=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; BDSFRCVID_BFESS=L5uOJeC62uetSXjekb60KkgjHaUtaobTH6ao4tGK_Lf8iZ6W8-KsEG0PsU8g0KubCsO7ogKKXgOTHw0F_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJKDoI_ytD_3fP36qRro244O-p-X5-RLfa7jal7F5l8-h40zjMJnXPLeb-nLJqv0QgLH0M7Ga-oxOKQphnQiQ5tEbfIHa4QIQjkO2hjN3KJmbMK9bT3vLtDrbJj92-biWabM2MbdbKJP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6KbejjbDNtfq-jeHDrKBRbaHJOoDDv8eMRcy4LbKxnxJ5v9tR7-KpcyQpvOencSDxRvD--g3-OkWUQ9babTQ-tbBp3k8MQTbf4VQfbQ0h8OX4PD3m5a-U-MBR7JOpkxbUnxy5KUQRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IttbKe_K02f-5_jJ7kqtbSMttfqx6betJyaR3nVIbvWJ5TMCoG-p6mjqD8j4v4WfnW5KQ7BUbLLUJCShPC-tnSKlIUqH3eWMJr3aOz3RT-3l02VMOEe-t2ynQDWh0qatRMW23v0h7mWP02sxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjChD6jbjaKqqbQX2COXsROs2ROOKRcgq4bohjP-Q4O9BtQmJJrthl3aaRcqMlRHWfRObPInLl-qWT_eQg-q3RAX2CTSqUJuKU6xh4IB3U5G0x-jLIOOVn0MW-KVh-35h4nJyUPRbPnnBn-j3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRu_CFbtD_WhD-RD5RE5bJHbpOhatQbb4o2WDv1J45cOR5Jj65hbJ09WM6-a-Dqtg3W0bcdKx5MHJ7P3MA--tR35-JiyM7hfI5z_f0yHlv5sq0x0bble-bQypoa3bJ9JKOMahv95h7xO-0xQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjISKx-_Jj-tfn6P; H_PS_PSSID=33802_33971_31253_33848_33607_34111_34092; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID_BFESS=A953043FB53F34466E8A697F5A2ACA90:FG=1",
"pragma": "no-cache",
"sec-ch-ua": 'Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
"sec-ch-ua-mobile":"?0",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
}
r = requests.get(url, headers=img_header, timeout=timeout).content
if not os.path.exists(path):
os.makedirs(path)
os.chdir(path)
with open(file_name, 'wb') as f:
f.write(r)
print('保存成功!第:{a}张/共{b}张'.format(a=index+1, b=n))
except Exception as e:
print(e, 'write')
def save_img(img_list,word):
for index, url in enumerate(img_list):
try:
file_name = "{index}.jpg".format(index=index + 1)
path = os.path.join(DIR_PATH,word)
# file_name =os.path.join(path,file_name)
pool.apply_async(save_imgX,(url,file_name,index,len(img_list),path))
except Exception as e:
print(e, 'enumerate1')
if __name__ == '__main__':
pool=Pool(6)
obj=main()
st=time.time()
save_img(obj.get("list"),obj.get("word"))
pool.close()
pool.join()
et=time.time()
print(f'用时{et-st}秒')