Python爬取Wallhaven壁纸
基础环境:
Windows10
Python3.12.3
PyCharm 2024.1.1
pip install requests beautifulsoup4 lxml
import requests
from bs4 import BeautifulSoup
import os
import time
import random
# 壁纸下载目录
download_dir = 'D:/Download' #保存至本地的路径
if not os.path.exists(download_dir):
os.makedirs(download_dir)
# Wallhaven的基础URL和搜索URL
base_url = 'https://wallhaven.icu'
search_url = 'https://wallhaven.cc/search?q=id%3A37&categories=110&purity=100&sorting=toplist&order=desc&ai_art_filter=1&page='
# 随机延迟函数
def random_delay():
time.sleep(random.uniform(1, 3))
# 获取壁纸链接
def get_wallpaper_links(page_url):
response = requests.get(page_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
thumbs = soup.find_all('a', class_='preview')
links = [thumb['href'] for thumb in thumbs]
print(f'Found {len(links)} wallpaper links on {page_url}')
return links
else:
print(f'Failed to fetch {page_url}: Status code {response.status_code}')
return []
# 下载壁纸
def download_wallpaper(wallpaper_url):
try:
response = requests.get(wallpaper_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
image_tag = soup.find('img', id='wallpaper')
if image_tag:
image_url = image_tag['src']
# 确保图片URL是完整的
if not image_url.startswith('http'):
image_url = 'https:' + image_url
image_name = image_url.split('/')[-1]
local_image_path = os.path.join(download_dir, image_name)
image_data = requests.get(image_url).content
with open(local_image_path, 'wb') as f:
f.write(image_data)
print(f'Downloaded: {image_name}')
print(f'Saved to: {local_image_path}')
else:
print(f'No image found at {wallpaper_url}')
else:
print(f'Failed to fetch {wallpaper_url}: Status code {response.status_code}')
except Exception as e:
print(f'Error downloading {wallpaper_url}: {e}')
def main():
# 这里可以设置要爬取的页数
num_pages = 1000
for page in range(1, num_pages + 1):
page_url = f'{search_url}{page}'
print(f'Fetching page {page}...')
wallpaper_links = get_wallpaper_links(page_url)
for link in wallpaper_links:
print(f'Processing wallpaper link: {link}')
download_wallpaper(link)
random_delay()
if __name__ == '__main__':
main()