文章目录
下载链接
1 背景
在该平台上接取任务时,经常由于僧多粥少,无法完成每日工作量,只好趴在电脑前一遍遍刷新,这种机械重复的过程理所应当可以用代码实现。
我没有选择使用连点器,因为连点器需要电脑界面一直停留在浏览器网页上,导致我没法干别的事情,其次,连点器比较死板,对于其他情况没法做出灵活应对。
经查阅资料,Selenium工具可以实现这个功能,于是便有了这篇针对接取百川平台任务的项目。
最后经测试,每个任务语音播放阶段设置为100秒,5秒左右刷新一次网页,大概130秒左右可完成一次视频审改。
2 关键代码
2.1 启动Edge浏览器
options=webdriver.EdgeOptions()
options.add_experimental_option('detach', True)
options.add_argument('--user-data-dir=D:\\EdgeData\\')
#options.add_argument('--headless') #去掉注释则不显示浏览器界面
driver = webdriver.Edge(options=options)
driver.get('https://www.baichuanweb.com/bmis/login') #百川任务平台网址
time.sleep(1)
#driver.maximize_window() #最大化浏览器界面
2.2 输入账密,点击登录
find_element_error(driver,'//*[@id="app"]/section/div/div/div[1]/form/div[2]/div/div/input').send_keys('15399999999')
driver.find_element(By.XPATH,'//*[@id="app"]/section/div/div/div[1]/form/div[3]/div/div/input').send_keys('110110110')
driver.find_element(By.XPATH,'//*[@id="app"]/section/div/div/div[1]/form/div[4]/label/span[1]/span').click() #寻找同意条款的按钮,并勾上
driver.find_element(By.XPATH,'//*[@id="app"]/section/div/div/div[1]/form/div[5]/div/button').click() #寻找登录按钮,并点击
time.sleep(2) #等两秒
2.3 开始循环
while(True):
judge_if_square_refresh_successfully=find_element_error(driver, '//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',text=True) # 任务都被抢完了,稍后再来吧~ or 0.8元/题 or False judge_if_snatch_successfully = find_element_error(driver,'//*[@id="app"]/div/div[1]/div[1]/div[1]/span',text=True,if_false_exit_immediately=True) # undefined请先与原题比对,题干一致再作答,不一致废弃或修改 or False print(judge_if_square_refresh_successfully,judge_if_snatch_successfully)
if judge_if_square_refresh_successfully == False and judge_if_snatch_successfully!=False:
for_Video_moderation_web_page(driver)
if judge_if_square_refresh_successfully!=False:
if judge_if_square_refresh_successfully[0]!='0': # 任务都被抢完了,稍后再来吧~ or 0.8元/题
driver.refresh()
time.sleep(5)
else: # 0.8元/题
# driver.maximize_window()
for _ in range(2):
# 鼠标悬停
find_element_error(driver,'//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',move_to_element=True,perform=True,if_false_exit_immediately=True)
find_element_error(driver,'//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',move_to_element=True,perform=True,if_false_exit_immediately=True)
# 0.3秒后点击开始任务
time.sleep(0.3)
find_element_error(driver,'//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',double_click=True,perform=True)
judge_if_snatch_successfully = find_element_error(driver,
'//*[@id="app"]/div/div[1]/div[1]/div[1]/span',
text=True) # 视频审改 or False if judge_if_snatch_successfully:
snatch_successfully_num += 1
print("成功抢题,目前是第 {} 题.".format(snatch_successfully_num))
break
if not find_element_error(driver, '//*[@id="app"]/div/div[1]/div[1]/div[1]/span',text=True): # 视频审改
continue
for _ in range(2):
time.sleep(1)
if not for_Video_moderation_web_page(driver):
continue
break # break
print(start)
print("本次循环耗时:{:.2f}".format(time.perf_counter()-start))
start = time.perf_counter()
print("========================= End ===========================")
2.4 成功接取任务
def for_Video_moderation_web_page(driver):
print("检测到进入审核网页。")
if find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[1]',double_click=True,perform=True,if_false_exit_immediately=True)==False: # 点两下倍速,调至1.5倍
return False
print("成功倍速")
if find_element_error(driver, '//*[@id="selectMarks"]/div[3]/table/tbody/tr[1]/td[5]/div/div[3]', click=True,if_false_exit_immediately=True)==False: # 连播
return False
print("成功连播1")
while find_element_error(driver, '//*[@id="selectMarks"]/div[3]/table/tbody/tr[1]/td[5]/div/div[3]/i', click=True,if_false_exit_immediately=True)==False: # 连播
return False
print("成功连播2")
if find_element_error(driver, '//*[@id="selectMarks"]/div[3]/table/tbody/tr[1]/td[5]/div/div[3]/div', click=True,if_false_exit_immediately=True)==False:
return False
print("成功连播3")
time.sleep(100)
wait_time=20
while find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[4]', is_enabled=True,if_false_exit_immediately=True)==False: # 提交按钮是否亮起 bool print(find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[4]', is_enabled=True,if_false_exit_immediately=True))
wait_time+=5
time.sleep(5)
if wait_time>120:
break
time.sleep(5)
if find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[4]', double_click=True,perform=True) == False: # 提交
return False
print("正在提交")
time.sleep(4)
num=0
while find_element_error(driver, '//*[@id="app"]/div/div[4]/div[2]/div/button[2]',if_false_exit_immediately=True)!=False and num<2: # 下一页预览图
num+=1
find_element_error(driver, '//*[@id="app"]/div/div[4]/div[2]/div/button[2]',click=True, if_false_exit_immediately=True)
print("成功翻页")
time.sleep(3.3)
if find_element_error(driver, '//*[@id="app"]/div/div[4]/div[3]/button[2]', double_click=True,perform=True,if_false_exit_immediately=True)==False: # 确认无误
return False
find_element_error(driver, '//*[@id="app"]/div/div[4]/div[3]/button[2]', double_click=True, perform=True,
if_false_exit_immediately=True)
find_element_error(driver, '//*[@id="app"]/div/div[4]/div[3]/button[2]',click=True, if_false_exit_immediately=True)
print("确认无误")
time.sleep(2)
if find_element_error(driver, '//*[@id="app"]/div/div[3]/div/div[3]/span/button[2]', click=True,if_false_exit_immediately=True)!=False: # 提交并返回任务广场
print("本次任务已全部完成,正在返回任务广场")
return True
else:
return False
2.5 大量try-except提高元素操作容错率
def find_element_error(driver,xpath,click=False,text=False,double_click=False,move_to_element=False,perform=False,is_enabled=False,if_false_exit_immediately=False):
try:
try:
if click:
return driver.find_element(By.XPATH, xpath).click()
elif text:
return driver.find_element(By.XPATH, xpath).text
elif is_enabled:
return driver.find_element(By.XPATH, xpath).is_enabled()
elif double_click:
if perform:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath))
elif move_to_element:
if perform:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath))
else:
return driver.find_element(By.XPATH,xpath)
except:
if if_false_exit_immediately:
return False
time.sleep(5)
driver.refresh()
if click:
return driver.find_element(By.XPATH, xpath).click()
elif text:
return driver.find_element(By.XPATH, xpath).text
elif is_enabled:
return driver.find_element(By.XPATH, xpath).is_enabled()
elif double_click:
if perform:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath))
elif move_to_element:
if perform:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath))
else:
return driver.find_element(By.XPATH, xpath)
except:
try:
time.sleep(5)
driver.refresh()
if click:
return driver.find_element(By.XPATH, xpath).click()
elif text:
return driver.find_element(By.XPATH, xpath).text
elif is_enabled:
return driver.find_element(By.XPATH, xpath).is_enabled()
elif double_click:
if perform:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath))
elif move_to_element:
if perform:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath))
else:
return driver.find_element(By.XPATH, xpath)
except:
return False
3 完整代码
贴上来就是了
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.wait import WebDriverWait
def find_element_error(driver,xpath,click=False,text=False,double_click=False,move_to_element=False,perform=False,is_enabled=False,if_false_exit_immediately=False):
try:
try:
if click:
return driver.find_element(By.XPATH, xpath).click()
elif text:
return driver.find_element(By.XPATH, xpath).text
elif is_enabled:
return driver.find_element(By.XPATH, xpath).is_enabled()
elif double_click:
if perform:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath))
elif move_to_element:
if perform:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath))
else:
return driver.find_element(By.XPATH,xpath)
except:
if if_false_exit_immediately:
return False
time.sleep(5)
driver.refresh()
if click:
return driver.find_element(By.XPATH, xpath).click()
elif text:
return driver.find_element(By.XPATH, xpath).text
elif is_enabled:
return driver.find_element(By.XPATH, xpath).is_enabled()
elif double_click:
if perform:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath))
elif move_to_element:
if perform:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath))
else:
return driver.find_element(By.XPATH, xpath)
except:
try:
time.sleep(5)
driver.refresh()
if click:
return driver.find_element(By.XPATH, xpath).click()
elif text:
return driver.find_element(By.XPATH, xpath).text
elif is_enabled:
return driver.find_element(By.XPATH, xpath).is_enabled()
elif double_click:
if perform:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).double_click(driver.find_element(By.XPATH, xpath))
elif move_to_element:
if perform:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath)).perform()
else:
return ActionChains(driver).move_to_element(driver.find_element(By.XPATH, xpath))
else:
return driver.find_element(By.XPATH, xpath)
except:
return False
def for_Video_moderation_web_page(driver):
print("检测到进入审核网页。")
if find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[1]',double_click=True,perform=True,if_false_exit_immediately=True)==False: # 点两下倍速,调至1.5倍
return False
print("成功倍速")
if find_element_error(driver, '//*[@id="selectMarks"]/div[3]/table/tbody/tr[1]/td[5]/div/div[3]', click=True,if_false_exit_immediately=True)==False: # 连播
return False
print("成功连播1")
while find_element_error(driver, '//*[@id="selectMarks"]/div[3]/table/tbody/tr[1]/td[5]/div/div[3]/i', click=True,if_false_exit_immediately=True)==False: # 连播
return False
print("成功连播2")
if find_element_error(driver, '//*[@id="selectMarks"]/div[3]/table/tbody/tr[1]/td[5]/div/div[3]/div', click=True,if_false_exit_immediately=True)==False:
return False
print("成功连播3")
time.sleep(100)
wait_time=20
while find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[4]', is_enabled=True,if_false_exit_immediately=True)==False: # 提交按钮是否亮起 bool print(find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[4]', is_enabled=True,if_false_exit_immediately=True))
wait_time+=5
time.sleep(5)
if wait_time>120:
break
time.sleep(5)
if find_element_error(driver, '//*[@id="app"]/div/div[1]/div[3]/div[2]/button[4]', double_click=True,perform=True) == False: # 提交
return False
print("正在提交")
time.sleep(4)
num=0
while find_element_error(driver, '//*[@id="app"]/div/div[4]/div[2]/div/button[2]',if_false_exit_immediately=True)!=False and num<2: # 下一页预览图
num+=1
find_element_error(driver, '//*[@id="app"]/div/div[4]/div[2]/div/button[2]',click=True, if_false_exit_immediately=True)
print("成功翻页")
time.sleep(3.3)
if find_element_error(driver, '//*[@id="app"]/div/div[4]/div[3]/button[2]', double_click=True,perform=True,if_false_exit_immediately=True)==False: # 确认无误
return False
find_element_error(driver, '//*[@id="app"]/div/div[4]/div[3]/button[2]', double_click=True, perform=True,
if_false_exit_immediately=True)
find_element_error(driver, '//*[@id="app"]/div/div[4]/div[3]/button[2]',click=True, if_false_exit_immediately=True)
print("确认无误")
time.sleep(2)
if find_element_error(driver, '//*[@id="app"]/div/div[3]/div/div[3]/span/button[2]', click=True,if_false_exit_immediately=True)!=False: # 提交并返回任务广场
print("本次任务已全部完成,正在返回任务广场")
return True
else:
return False
options=webdriver.EdgeOptions()
options.add_experimental_option('detach', True)
options.add_argument('--user-data-dir=D:\\EdgeData\\')
#options.add_argument('--headless')
driver = webdriver.Edge(options=options)
driver.get('https://www.baichuanweb.com/bmis/login')
time.sleep(1)
#driver.maximize_window()
find_element_error(driver,'//*[@id="app"]/section/div/div/div[1]/form/div[2]/div/div/input').send_keys('15399999999')
driver.find_element(By.XPATH,'//*[@id="app"]/section/div/div/div[1]/form/div[3]/div/div/input').send_keys('110110110')
driver.find_element(By.XPATH,'//*[@id="app"]/section/div/div/div[1]/form/div[4]/label/span[1]/span').click()
driver.find_element(By.XPATH,'//*[@id="app"]/section/div/div/div[1]/form/div[5]/div/button').click()
time.sleep(2)
snatch_successfully_num=0
start=time.perf_counter()
while(True):
judge_if_square_refresh_successfully=find_element_error(driver, '//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',text=True) # 任务都被抢完了,稍后再来吧~ or 0.8元/题 or False judge_if_snatch_successfully = find_element_error(driver,'//*[@id="app"]/div/div[1]/div[1]/div[1]/span',text=True,if_false_exit_immediately=True) # undefined请先与原题比对,题干一致再作答,不一致废弃或修改 or False print(judge_if_square_refresh_successfully,judge_if_snatch_successfully)
if judge_if_square_refresh_successfully == False and judge_if_snatch_successfully!=False:
for_Video_moderation_web_page(driver)
if judge_if_square_refresh_successfully!=False:
if judge_if_square_refresh_successfully[0]!='0': # 任务都被抢完了,稍后再来吧~ or 0.8元/题
driver.refresh()
time.sleep(5)
else: # 0.8元/题
# driver.maximize_window()
for _ in range(2):
# 鼠标悬停
find_element_error(driver,'//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',move_to_element=True,perform=True,if_false_exit_immediately=True)
find_element_error(driver,'//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',move_to_element=True,perform=True,if_false_exit_immediately=True)
# 0.3秒后点击开始任务
time.sleep(0.3)
find_element_error(driver,'//*[@id="app"]/section/div/div/div[2]/div[2]/div/div[2]/div[2]',double_click=True,perform=True)
judge_if_snatch_successfully = find_element_error(driver,
'//*[@id="app"]/div/div[1]/div[1]/div[1]/span',
text=True) # 视频审改 or False if judge_if_snatch_successfully:
snatch_successfully_num += 1
print("成功抢题,目前是第 {} 题.".format(snatch_successfully_num))
break
if not find_element_error(driver, '//*[@id="app"]/div/div[1]/div[1]/div[1]/span',text=True): # 视频审改
continue
for _ in range(2):
time.sleep(1)
if not for_Video_moderation_web_page(driver):
continue
break # break
print(start)
print("本次循环耗时:{:.2f}".format(time.perf_counter()-start))
start = time.perf_counter()
print("========================= End ===========================")
欢迎继续开发~
4 说明
其实这里面还包含了接取任务之后地一系列操作,例如连播语音、确认提交、翻页等操作,甚至晚上睡觉都可以挂在电脑上,兴许你早上起床的时候发现题已经刷完了,但是没有修改等操作,经本人测试,直接提交的话我130题里无学科错误,150题里有一个学科错误。当然,这有点不负责任~
关于在此框架下之后的进一步开发,可以读取脚本里的解析文本,同时读取原题答案里面的文本或图片(大多是图片形式),然后对图片执行OCR,最后利用模型得到两段文本中的答案信息,通过比对,若一样,则进行下一步比如直接提交,若不一样,则提醒用户进行修改,30min以内用户未作出修改(可能暂时不在),则自动放弃该任务。 可以进一步降低学科错误率。
至于模型怎么得到,我也没有,但是同学你可以自己找数据集或者自己标注数据集然后训练呀~
或者我想可能他们也是在默默收集兼职人员对脚本的更改内容,训练模型,待技术成熟,该任务就不存在了。或许老师是假,数据标注师才是真~
效果展示
demo
5 等等,你不知道怎么使用?你不是很熟悉Python?
来了来了哈,我对代码进行了一些修改,同时打包成了一个Windows可执行文件,更符合非专业宝宝体制。
import os
file_path = "./config.txt"
if not os.path.exists(file_path):
print("未检测到配置文件config.txt")
print("正在初始化……")
file=open(file_path, "w",encoding='utf-8')
id=input("您的帐号:")
file.write("您的帐号:"+id+"\n")
pwd=input("您的密码:")
file.write("您的密码:"+pwd+"\n")
task=input("任务类型(脚本审改or视频审改):")
file.write("任务类型:"+task+"\n")
mode=input("模式(仅接取任务or抢题审题一条龙):")
file.write("模式:"+mode+"\n")
file.close()
print("配置完毕!")
else:
print("已检测到配置文件config.txt")
print("正在读取配置信息……")
file=open(file_path, "r",encoding='utf-8')
id=file.readline().split(":")[1].strip()
print("您的帐号:"+id)
pwd=file.readline().split(":")[1].strip()
print("您的密码:"+pwd)
task=file.readline().split(":")[1].strip()
print("任务类型:"+task)
mode=file.readline().split(":")[1].strip()
print("模式:"+mode)
file.close()
print("读取完毕!")
为方便使用,我把它打包成了一个百川脚本.exe
文件,下载点击即可运行~
欢迎使用。下载链接
6 总结
通过该project我初步学会了网页自动化操作,顺带也更加了解html了,有点收获~