import requests # 用于请求网络
import os
from urllib.parse import quote
start_url = f"https://image.baidu.com/search/index"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
'Accept': 'application/json, text/javascript, */*; q=0.01'}
user_input = input("请输入你要搜索的关键字<示例:复古穿搭 男>")
user_pages = input("请输入你要爬取的页数<示例:5>")
os_path = os.getcwd() + '/' + f'{user_input}'
if not os.path.exists(os_path):
os.mkdir(os_path)
for page in range(1, int(user_pages)+1):
params = {
'tn': 'resultjson_com',
'logid': '10744800491272928835',
'word': f'{user_input}',
'ie': 'utf-8',
'pn': f'{page * 30}',
'rn': '30',
}
response = requests.get(start_url, headers=headers, params=params).json()
for num in range(30):
data = response["data"][num]
image_url = data['thumbURL']
img = requests.get(image_url).content
with open(os_path + '/' + f"{30 * (page-1) + num +1}" + '.jpg', 'wb') as f:
f.write(img)
print(f"图片{30 * (page-1) + num + 1} =================采集完成==========logging!")
【爬虫】爬虫学习 数据请求 request.get()
最新推荐文章于 2023-05-18 16:01:21 发布