【爬虫】爬虫学习数据请求 request.get()

最新推荐文章于 2024-03-25 13:55:15 发布

Enzo 想砸电脑

最新推荐文章于 2024-03-25 13:55:15 发布

阅读量298

点赞数

文章标签：爬虫学习 python

本文链接：https://blog.csdn.net/weixin_37804469/article/details/126926250

版权

爬虫专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import requests  # 用于请求网络
import os
from urllib.parse import quote

start_url = f"https://image.baidu.com/search/index"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
           'Accept': 'application/json, text/javascript, */*; q=0.01'}

user_input = input("请输入你要搜索的关键字<示例：复古穿搭 男>")
user_pages = input("请输入你要爬取的页数<示例：5>")

os_path = os.getcwd() + '/' + f'{user_input}'
if not os.path.exists(os_path):
    os.mkdir(os_path)


for page in range(1, int(user_pages)+1):

    params = {
        'tn': 'resultjson_com',
        'logid': '10744800491272928835',
        'word': f'{user_input}',
        'ie': 'utf-8',
        'pn': f'{page * 30}',
        'rn': '30',
        }

    response = requests.get(start_url, headers=headers, params=params).json()

    for num in range(30):
        data = response["data"][num]
        image_url = data['thumbURL']
        img = requests.get(image_url).content

        with open(os_path + '/' + f"{30 * (page-1) + num +1}" + '.jpg', 'wb') as f:
            f.write(img)

        print(f"图片{30 * (page-1) + num + 1} =================采集完成==========logging!")