Python爬虫批量下载国内某网站图片

最新推荐文章于 2023-03-02 14:19:21 发布

小卫@

最新推荐文章于 2023-03-02 14:19:21 发布

阅读量511

点赞数

分类专栏：爬虫文章标签： python

本文链接：https://blog.csdn.net/m0_47068593/article/details/108532867

版权

爬虫专栏收录该内容

2 篇文章 0 订阅

订阅专栏

用python批量下载国内某网站的图片

  在这里用到了requests来发送请求 ,json,re,bs4来对数据进行解析，下面是源代码示例

import requests
import json
import re
from tqdm import tqdm
from bs4 import BeautifulSoup
def Spaider(num, page):
    # 起始URL
    url = 'https://webapi.8btc.com/bbt_api/news/list?num={}&page={}&cat_id=242'.format(num, page)
    headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
            }
    # 发送请求
    response = requests.get(url, headers=headers).content
    # 根据正则表达式对url进行解析
    json_str = re.findall(r'\[.+\]', str(response.decode('utf-8')))
    datas = json.loads(str(json_str[0]))
    json_data = json.dumps(datas)
    print(json_data)
    # 保存文件
    with open('csv_json.json', 'w',encoding='utf-8') as f:
        f.write(json_data)
    
    print(len(datas))
    for i in tqdm(datas, '正在下载图片'):
        # print(i['images'][1])
        # print(i['image'])

        image_url = i['image']
        image_name = image_url.split('/')[-1]
        name = './image/' + image_name
        image_response = requests.get(image_url, stream=True)

        with open(name, 'wb') as f:
            for chunk in image_response.iter_content(chunk_size=128):
                f.write(chunk)

    for i in tqdm(datas, '正在下载图片'):

        images_url = i['images'][1]
        images_name = images_url.split('/')[-1]
        name = './image/' + images_name
        images_response = requests.get(images_url, stream=True)
        with open(name, 'wb') as f:
            for chunk in images_response.iter_content(chunk_size=128):
                f.write(chunk)


if __name__ == '__main__':
    for i in range(1, 100):
        Spaider(50, i)