import re import os import time import requests from bs4 import BeautifulSoup headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4573.0 Safari/537.36' } url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&logid=12424654831072445813&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E7%A7%91%E6%AF%94%E5%9B%BE%E7%89%87&cl=&lm=&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=©right=&word=%E7%A7%91%E6%AF%94%E5%9B%BE%E7%89%87&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&nojc=&cg=star&pn=30&rn=30&gsm=1e&1627723520481=' response = requests.get(url,headers=headers) html = response.text # print(html) '''解析网页''' dir_name =re.findall('"fromPageTitleEnc":"(.*?)?","bdSourceName":"",',html)[4] # 目录的名字 # print(dir_name) if not os.path.exists(dir_name): # 如果存在这个文件名的文件夹,就把图片放进去 os.mkdir(dir_name) # 否则创建一个这样的一个文件名的文件夹 urls = re.findall('"thumbURL":"(.*?)"',html) # print(urls) '''保存图片''' for i in urls: time.sleep(1) # 图片名字 file_name = i.split('/')[-1] # 在链接上取相应的文件名 response = requests.get(i,headers = headers) with open(dir_name + '/' + file_name,'wb') as f: f.write(response.content)
在百度图片上爬科比(可批量下载)
最新推荐文章于 2024-11-10 08:15:38 发布