本次增加的是答题多次循环,以及出现网络情况不好弹窗的解决
from selenium import webdriver
from lxml import etree
import requests
import time
import random
import json
from selenium.webdriver.remote.webelement import WebElement
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/90.0.4430.93 Safari/537.36 Edg/90.0.818.56 "
}
browser = webdriver.Edge(executable_path = "MicrosoftWebDriver.exe")
# 让浏览器发起一个指定url的请求
browser.get("https://www.yooc.me/login")
# 定位标签
account_input = browser.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[2]/input')
account_input.send_keys('你的账号')
password_input = browser.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[3]/input')
password_input.send_keys('你的密码')
# 用page_source获取当前页面的源码数据
response = browser.page_source
tree = etree.HTML(response)
code_url = tree.xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[4]/img/@src')[0]
text_response = requests.get(url = code_url, headers = headers).content
with open("./code_text.jpg", "wb") as fp:
fp.write(text_response)
code_text = input("请查看验证码,并在30秒内输入:")
code_text_input = browser.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/form/div[4]/input[1]')
# 与标签交互,输入文本
code_text_input.send_keys(code_text)
login = browser.find_element_by_id('submit')
login.click()
time.sleep(5)
topic_url = browser.find_element_by_xpath('/html/body/div[2]/div[2]/table/tbody/tr/td/div[1]/div[3]/div[1]/a[2]')
topic_url.click()
time.sleep(2)
handles = browser.window_handles
browser.switch_to.window(handles[1])
exam_url = browser.find_element_by_xpath('/html/body/section/section/div[1]/div[4]/a')
exam_url.click()
time.sleep(2)
for every in range(100):
exam_detail = browser.find_element_by_xpath('/html/body/section/section/div[2]/div[3]/ul/li[1]/div[2]/a[2]')
exam_detail.click()
time.sleep(2)
confirm_btn = browser.find_element_by_xpath('/html/body/div[12]/div[3]/div/div[1]')
confirm_btn.click()
time.sleep(5)
while True:
try: # 出现网络问题弹窗时,自动关闭,并试到成功为止
network_anomaly = browser.find_element_by_xpath('/html/body/div[12]/div[3]/div/div/button')
network_anomaly.click()
except:
break
pass
else:
confirm_btn.click()
bodylist = browser.find_elements_by_xpath('/html/body/section/section/div[5]/div[@class="question-board"]')
print(bodylist)
browser.maximize_window()
for each in bodylist:
print(each.text)
templist = each.find_elements_by_tag_name('label')
islist = random.choice(templist)
while True:
try: # 出现网络问题弹窗时,自动关闭,并试到成功为止
network_anomaly = browser.find_element_by_xpath('/html/body/div[7]/div[3]/div/div/button')
network_anomaly.click()
except:
break
pass
else:
islist.click()
time.sleep(0.5)
islist.click()
time.sleep(0.5)
print("选项已勾选!")
print("正在交卷!")
submit = browser.find_element_by_xpath('/html/body/section/aside/div[2]/div[2]/div[3]/a[2]')
submit.click()
time.sleep(2)
confirm_again = browser.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]')
confirm_again.click()
time.sleep(2)
check_detail = browser.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]')
check_detail.click()
time.sleep(2)
print("正在读取题目!")
exam_answers = browser.page_source
exam_answers_tree = etree.HTML(exam_answers)
answers_list = exam_answers_tree.xpath('/html/body/section/section/div[3]/div[@class="question-board"]')
questions = {}
for answers in answers_list:
if answers.xpath('./@id')[0] not in list(questions.keys()):
questions[answers.xpath('./@id')[0]] = answers.xpath('.//text()')
print(answers, "读取成功!")
again_btn = browser.find_element_by_xpath('/html/body/section/div/a')
again_btn.click()
# 2. 把python格式数据转换为json格式文件
# 2.1 构建指向该文件的文件对象
with open("./yiban_questionbank.json", "a+") as fp:
# 2.2 其中会把python的对应类型转换为json的类型
json.dump(questions, fp, ensure_ascii = False)
print("保存成功!")
browser.quit()
之后会考虑增加第三方识别验证码,实现完全自动化,如果有什么建议,欢迎提出!!!