壁纸爬虫,壁纸分类

本文介绍了两个Python脚本:一个用于从网页抓取图片并下载到指定目录,另一个用于根据图片尺寸将下载的壁纸分为竖屏和横屏两个文件夹。
摘要由CSDN通过智能技术生成
# -*- encoding: utf-8 -*-
# @ModuleName: pic_crawler
# @Function: 图片爬虫
# @Author: Yokon
# @Time: 2024/3/19 16:13

import os
import random
import re
import string
import requests
from bs4 import BeautifulSoup
import urllib3
from tqdm import tqdm
urllib3.disable_warnings()


# 生成一个随机字符串
def generate_random_string(length):
    """生成指定长度的包含字母和数字的随机字符串"""
    letters_and_digits = string.ascii_letters + string.digits  # 包含所有字母和数字的字符串
    random_string = ''.join(random.choice(letters_and_digits) for i in range(length))
    return random_string


def download_image(image_urls, save_path):
    # num = 0
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    # for item in image_urls:
    for item in tqdm(image_urls, desc='Processing items', unit='item'):
        response = requests.get(item)
        soup = BeautifulSoup(response.text, 'html.parser')
        image_url = soup.find_all('div', attrs={"class": "scrollbox"})
        # print(image_url)
        soup = BeautifulSoup(str(image_url), 'html.parser')
        img_tag = soup.find('img')
        image_url = img_tag['src'] if img_tag else None
        # print(image_url)

        image_filename = generate_random_string(12) + f'.jpg'
        image_path = os.path.join(save_path, image_filename)

        response = requests.get(str(image_url), stream=True)
        if response.status_code == 200:
            with open(image_path, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
        # num += 1
        # print("writing the number of img", num)


def crawl_wallpapers(urls):
    image_urls = []
    for item in urls:
        response = requests.get(item, verify=False)
        soup = BeautifulSoup(response.text, 'html.parser')
        image_urls_list = soup.find_all('a', attrs={"class": "preview"})
        # print(image_urls)

        for u in image_urls_list:
            # 使用正则表达式匹配所有包含特定格式链接地址的<a>标签
            pattern = re.compile(r'<a class="preview" href="(.*?)" target="_blank"></a>')
            matches = pattern.findall(str(u))
            image_urls.append(matches[0])
    print("Getbase!!!")
    return image_urls


def split_list(input_list, chunk_size):
    return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]


if __name__ == "__main__":
    # 获取基础页面网址
    base = 'https:seed=67cziB&page='
    baseurl = []
    for i in range(60, 17000):
        url = base + str(i)
        baseurl.append(url)

    new_list = split_list(baseurl, 10)

    i = 70
    # 查询二级索引
    for l1_list in new_list:
        l2_urls = crawl_wallpapers(l1_list)
        print(l2_urls)
        print("第 %s 组" % i)
        i += 1
        # 进入二级索引并下载
        save_directory = 'wallpapers'
        download_image(l2_urls, save_directory)

 

# -*- encoding: utf-8 -*-
# @ModuleName: pic_divide
# @Function: 分开竖屏横屏
# @Author: Yokon
# @Time: 2024/3/20 16:05
from PIL import Image
import os
import shutil


def move_images(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for file in os.listdir(input_folder):
        if file.endswith(".jpg") or file.endswith(".png"):  # 可根据需求修改图片格式
            image_path = os.path.join(input_folder, file)
            image = Image.open(image_path)
            width, height = image.size
            image.close()

            if height > width:
                output_path = os.path.join(output_folder, file)
                shutil.move(image_path, output_path)

    print("移动完成!")


# 输入文件夹路径和输出文件夹路径
input_folder = "wallpapers"
output_folder = "vertical_images"

move_images(input_folder, output_folder)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Yokon_D

您的鼓励将是我最大的动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值