python批量下载图片

背景:通过python脚本多线程从《https://wallhaven.cc》壁纸网站批量下载图片,如果下载其它站点请调整html结构。

系统环境:windows10

python版本:3.8

完整脚本如下:
# -*- coding: UTF-8 -*-
import time
from queue import Queue
from requests_html import HTMLSession
import requests
import threading

now = time.time()
get_links_list = []
session = HTMLSession()

# url_1 用于获取当前图片页码,urls2为当前页码
url_1 = 'https://wallhaven.cc/search?q=id%3A24972&sorting=random&ref=fp&seed=WbEycL&page=2'
urls2 = 'https://wallhaven.cc/search?q=id%3A24972&sorting=random&ref=fp&seed=WbEycL&page='

# 需要爬取的页面,30为爬取的内容至30页,如果小于30页则获取所有页码图片
page_number = 30


# 保存图片至指定页面
def save_image(url, title):
    img_response = requests.get(url)
    with open('.\Wallpapers\\'+title+'.png', 'wb') as file:
        file.write(img_response.content)


# 获取页码数
def get_page(url_1):
    r = session.get(url_1)
    news = r.html.find('div > section > header > h2')
    for new in news:
        page1 = new.text
        if 'Page' in page1:
            page2 = str(page1).split(' ')[-1]
    return page2


# 获取图片下载连接
def get_picturs_url(page, urls2):
    page = int(page)
    if page <= page_number:
        print(f'当前页码数是:{page} ')
        for page1 in range(page):
            url = f'{urls2}{page1}'
            r = session.get(url)
            news = r.html.find('div > section > ul > li > figure > a')
            for new in news:
                get_links_list.append(str(new.absolute_links).replace('\'', '').replace('{', '').replace('}', ''))
    else:
        for page2 in range(page_number):
            url = f'{urls2}{page2}'
            r = session.get(url)
            news = r.html.find('div > section > ul > li > figure > a')
            for new in news:
                get_links_list.append(str(new.absolute_links).replace('\'', '').replace('{', '').replace('}', ''))
    return get_links_list


# 下载图片
def get_picture(links, output_q):
        r = session.get(links)
        items_img = r.html.find('body > main > section > div > img')
        for imgs in items_img:
            url = imgs.attrs['src']
            title = imgs.attrs['data-wallpaper-id']
            print(url + title)
            save_image(url, title)


# 通过多线程调用下载函数
if __name__ == '__main__':
    page = get_page(url_1)
    get_picturs_url(page, urls2)
    for links in get_links_list:
        time.sleep(2)
        t = threading.Thread(target=get_picture, args=(links, Queue()))
        t.start()
    print(time.time() - now)

模块安装:

pip install requests-html
pip install requests

参考文档:

https://docs.python-requests.org/projects/requests-html/en/latest/

https://docs.python-requests.org/en/latest/

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值