全自动下载removebg图片

喝粥也会胖的唐僧

已于 2024-05-16 15:58:58 修改

阅读量206

点赞数 1

文章标签： selenium removebg

于 2024-05-16 15:47:56 首次发布

本文链接：https://blog.csdn.net/zhou_438/article/details/138963985

版权

之前写过两篇半自动的：

半自动下载removebg图片-CSDN博客

半自动下载removebg图片（多进程版）-CSDN博客

但需要在遇到人机验证的时候，手动点击，这里直接使用百度ai来检测图片是什么，主要是基于removebg的人机检测的图片主要就两种，一种是黄蜂，一种是植物，当然有更好的办法，这个是有专门的工具，比如图鉴，还有好几个工具sdk，不过我这里使用百度ai就能搞定，就没去折腾专门的sdk了。代码如下：

import os
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from time import sleep
import uuid
from aip import AipImageClassify


chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')


# 使用百度检测是否是植物/动物
client_plant = AipImageClassify('62249243', '自己去百度ai获取', '自己去百度ai获取')
client_animal = AipImageClassify('62388196','自己去百度ai获取','自己去百度ai获取')
base_path = 'plant_class'
flag=True
bro=webdriver.Chrome(options=chrome_options, executable_path='./chromedriver')



url='https://www.remove.bg/zh/upload'
bro.get(url=url)
sleep(5)


def exe_human():
    sleep(10)
    iframe = bro.find_element_by_xpath("/html/body/div[3]/div[1]/iframe")
    bro.switch_to.frame(iframe)
    pattern = r"请点击每张包含(.+?)的图片"
    #  /html/body/div/div[1]/div/div/div[1]/div[1]/div[1]/h2/span /html/body/iframe[2] /html/body/div[3]/div[1]/iframe
    is_plant = bro.find_element_by_xpath('/html/body/div/div[1]/div/div/div[1]/div[1]/div[1]/h2/span')
    flag = True if re.search(pattern, is_plant.text).group(1) == '植物' else False
    print('开始进行AI绕过',re.search(pattern, is_plant.text).group(1))
    with open(base_path_deep + '/result.txt', 'w', encoding='utf-8') as f:
        for j in range(2): # 因为HCaptcha在removebg使用了两页，需要验证两次
            for i in range(1, 10):
                div = '/html/body/div/div[1]/div/div/div[2]/div[%d]/div[3]/div[1]' % i
                imgs = bro.find_element_by_xpath(div)
                img_path_indx = os.path.join(base_path_deep, str(j) + '_' + str(i) + '_code.jpg')
                imgs.screenshot(img_path_indx)
                if flag:
                    result = client_plant.plantDetect(image=open(img_path_indx, 'rb').read())
                    f.write(result['result'][0]['name'] + '\n')
                    if result['result'][0]['name'] != '非植物':
                        print("正在点击第%d个格子" % i)
                        imgs.click()
                    else:
                        print("正在跳过第%d个格子" % i)

                else:
                    result = client_animal.animalDetect(image=open(img_path_indx, 'rb').read())
                    f.write(result['result'][0]['name'] + '\n')
                    if result['result'][0]['name'].endswith("蜂"):
                        print("正在点击第%d个格子" % i)
                        imgs.click()
                    else:
                        print("正在跳过第%d个格子" % i)
            print("下一页")
            bro.find_element_by_xpath('/html/body/div/div[3]/div[3]').click()
            print("成功进入下一页")
            sleep(5)

        f.close()


def exe_generate():
    sleep(10)
    bro.find_element_by_xpath("//*[@id='page-content']/div[2]/div/div/div/div[2]/div[2]/div[2]/button").click()


if __name__ == '__main__':

    with open('待处理的图片url.txt','r') as f:
        data=f.readlines()
    print('--------开始处理removebg---------')
    for index, i in enumerate(data[:]):
        sleep(1)
        base_path_deep=os.path.join(base_path, str(uuid.uuid4()).replace('-',''))
        os.makedirs(base_path_deep, exist_ok=True)
        bro.execute_script("uploadUrl('{}');".format(i.strip()))
        sleep(20)
        try:
            iframe = bro.find_element_by_xpath('//*[@id="page"]/div/div[3]/div/div/iframe')
            bro.switch_to.frame(iframe)
            
            bro.find_element_by_xpath("//*[@id='checkbox']").click()
            bro.switch_to.default_content()
            print('人机验证开始')
            exe_generate()
        except NoSuchElementException:
            print('不存在人机验证')
            exe_generate()

代码仅供参考，需要的自己进行优化。

喝粥也会胖的唐僧

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
全自动下载removebg图片

但需要在遇到人机验证的时候，手动点击，这里直接使用百度ai来检测图片是什么，主要是基于removebg的人机检测的图片主要就两种，一种是黄蜂，一种是植物，当然有更好的办法，这个是有专门的工具，我这里使用百度ai就能搞定，就没去折腾了。代码仅供参考，需要的自己进行优化。
复制链接

扫一扫