Python爬取wallhaven壁纸

Python爬取Wallhaven壁纸

基础环境:
Windows10
Python3.12.3
PyCharm 2024.1.1

pip install requests beautifulsoup4 lxml
import requests
from bs4 import BeautifulSoup
import os
import time
import random

# 壁纸下载目录
download_dir = 'D:/Download' #保存至本地的路径
if not os.path.exists(download_dir):
    os.makedirs(download_dir)

# Wallhaven的基础URL和搜索URL
base_url = 'https://wallhaven.icu' 
search_url = 'https://wallhaven.cc/search?q=id%3A37&categories=110&purity=100&sorting=toplist&order=desc&ai_art_filter=1&page='

# 随机延迟函数
def random_delay():
    time.sleep(random.uniform(1, 3))

# 获取壁纸链接
def get_wallpaper_links(page_url):
    response = requests.get(page_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        thumbs = soup.find_all('a', class_='preview')
        links = [thumb['href'] for thumb in thumbs]
        print(f'Found {len(links)} wallpaper links on {page_url}')
        return links
    else:
        print(f'Failed to fetch {page_url}: Status code {response.status_code}')
        return []

# 下载壁纸
def download_wallpaper(wallpaper_url):
    try:
        response = requests.get(wallpaper_url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            image_tag = soup.find('img', id='wallpaper')
            if image_tag:
                image_url = image_tag['src']
                # 确保图片URL是完整的
                if not image_url.startswith('http'):
                    image_url = 'https:' + image_url
                image_name = image_url.split('/')[-1]
                local_image_path = os.path.join(download_dir, image_name)
                image_data = requests.get(image_url).content
                with open(local_image_path, 'wb') as f:
                    f.write(image_data)
                print(f'Downloaded: {image_name}')
                print(f'Saved to: {local_image_path}')
            else:
                print(f'No image found at {wallpaper_url}')
        else:
            print(f'Failed to fetch {wallpaper_url}: Status code {response.status_code}')
    except Exception as e:
        print(f'Error downloading {wallpaper_url}: {e}')

def main():
    # 这里可以设置要爬取的页数
    num_pages = 1000
    for page in range(1, num_pages + 1):
        page_url = f'{search_url}{page}'
        print(f'Fetching page {page}...')
        wallpaper_links = get_wallpaper_links(page_url)
        for link in wallpaper_links:
            print(f'Processing wallpaper link: {link}')
            download_wallpaper(link)
            random_delay()

if __name__ == '__main__':
    main()

  • 3
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值