之前写过两篇半自动的:
但需要在遇到人机验证的时候,手动点击,这里直接使用百度ai来检测图片是什么,主要是基于removebg的人机检测的图片主要就两种,一种是黄蜂,一种是植物,当然有更好的办法,这个是有专门的工具,比如图鉴,还有好几个工具sdk,不过我这里使用百度ai就能搞定,就没去折腾专门的sdk了。代码如下:
import os
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from time import sleep
import uuid
from aip import AipImageClassify
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36')
# 使用百度检测是否是植物/动物
client_plant = AipImageClassify('62249243', '自己去百度ai获取', '自己去百度ai获取')
client_animal = AipImageClassify('62388196','自己去百度ai获取','自己去百度ai获取')
base_path = 'plant_class'
flag=True
bro=webdriver.Chrome(options=chrome_options, executable_path='./chromedriver')
url='https://www.remove.bg/zh/upload'
bro.get(url=url)
sleep(5)
def exe_human():
sleep(10)
iframe = bro.find_element_by_xpath("/html/body/div[3]/div[1]/iframe")
bro.switch_to.frame(iframe)
pattern = r"请点击每张包含(.+?)的图片"
# /html/body/div/div[1]/div/div/div[1]/div[1]/div[1]/h2/span /html/body/iframe[2] /html/body/div[3]/div[1]/iframe
is_plant = bro.find_element_by_xpath('/html/body/div/div[1]/div/div/div[1]/div[1]/div[1]/h2/span')
flag = True if re.search(pattern, is_plant.text).group(1) == '植物' else False
print('开始进行AI绕过',re.search(pattern, is_plant.text).group(1))
with open(base_path_deep + '/result.txt', 'w', encoding='utf-8') as f:
for j in range(2): # 因为HCaptcha在removebg使用了两页,需要验证两次
for i in range(1, 10):
div = '/html/body/div/div[1]/div/div/div[2]/div[%d]/div[3]/div[1]' % i
imgs = bro.find_element_by_xpath(div)
img_path_indx = os.path.join(base_path_deep, str(j) + '_' + str(i) + '_code.jpg')
imgs.screenshot(img_path_indx)
if flag:
result = client_plant.plantDetect(image=open(img_path_indx, 'rb').read())
f.write(result['result'][0]['name'] + '\n')
if result['result'][0]['name'] != '非植物':
print("正在点击第%d个格子" % i)
imgs.click()
else:
print("正在跳过第%d个格子" % i)
else:
result = client_animal.animalDetect(image=open(img_path_indx, 'rb').read())
f.write(result['result'][0]['name'] + '\n')
if result['result'][0]['name'].endswith("蜂"):
print("正在点击第%d个格子" % i)
imgs.click()
else:
print("正在跳过第%d个格子" % i)
print("下一页")
bro.find_element_by_xpath('/html/body/div/div[3]/div[3]').click()
print("成功进入下一页")
sleep(5)
f.close()
def exe_generate():
sleep(10)
bro.find_element_by_xpath("//*[@id='page-content']/div[2]/div/div/div/div[2]/div[2]/div[2]/button").click()
if __name__ == '__main__':
with open('待处理的图片url.txt','r') as f:
data=f.readlines()
print('--------开始处理removebg---------')
for index, i in enumerate(data[:]):
sleep(1)
base_path_deep=os.path.join(base_path, str(uuid.uuid4()).replace('-',''))
os.makedirs(base_path_deep, exist_ok=True)
bro.execute_script("uploadUrl('{}');".format(i.strip()))
sleep(20)
try:
iframe = bro.find_element_by_xpath('//*[@id="page"]/div/div[3]/div/div/iframe')
bro.switch_to.frame(iframe)
bro.find_element_by_xpath("//*[@id='checkbox']").click()
bro.switch_to.default_content()
print('人机验证开始')
exe_generate()
except NoSuchElementException:
print('不存在人机验证')
exe_generate()
代码仅供参考,需要的自己进行优化。