Python爬虫批量下载国内某网站图片

用python批量下载国内某网站的图片

  在这里用到了requests来发送请求 ,json,re,bs4来对数据进行解析,下面是源代码示例
import requests
import json
import re
from tqdm import tqdm
from bs4 import BeautifulSoup
def Spaider(num, page):
    # 起始URL
    url = 'https://webapi.8btc.com/bbt_api/news/list?num={}&page={}&cat_id=242'.format(num, page)
    headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
            }
    # 发送请求
    response = requests.get(url, headers=headers).content
    # 根据正则表达式对url进行解析
    json_str = re.findall(r'\[.+\]', str(response.decode('utf-8')))
    datas = json.loads(str(json_str[0]))
    json_data = json.dumps(datas)
    print(json_data)
    # 保存文件
    with open('csv_json.json', 'w',encoding='utf-8') as f:
        f.write(json_data)
    
    print(len(datas))
    for i in tqdm(datas, '正在下载图片'):
        # print(i['images'][1])
        # print(i['image'])

        image_url = i['image']
        image_name = image_url.split('/')[-1]
        name = './image/' + image_name
        image_response = requests.get(image_url, stream=True)

        with open(name, 'wb') as f:
            for chunk in image_response.iter_content(chunk_size=128):
                f.write(chunk)

    for i in tqdm(datas, '正在下载图片'):

        images_url = i['images'][1]
        images_name = images_url.split('/')[-1]
        name = './image/' + images_name
        images_response = requests.get(images_url, stream=True)
        with open(name, 'wb') as f:
            for chunk in images_response.iter_content(chunk_size=128):
                f.write(chunk)


if __name__ == '__main__':
    for i in range(1, 100):
        Spaider(50, i)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值