本文主要记录下selenium的常见操作,如定位具体元素的不同方法、在具体元素内循环、提取文本等。具体代码如下:
# -*- coding: utf-8 -*-
'''
Created on 2019年4月30日
@author: cvter
'''
from selenium import webdriver
import time
import pandas as pd
import numpy as np
import re
if __name__ == "__main__":
#设置浏览器
driver = webdriver.Chrome(r"D:\\Program Files\\chromedriver.exe")
#driver.set_window_size(1920, 1080)
#模拟登录
driver.get('http://www.yoursite.com/')
#driver.find_element_by_id("username").send_keys("yourname")
#driver.find_element_by_id("passwdInput").send_keys("yourpassword")
#driver.find_element_by_id("loginSubmit").submit()
time.sleep(50)#手动登录,并进入到开始做题页面
windows = driver.window_handles
driver.switch_to.window(windows[-1])#切换当前窗口
queAll = []#保存本页所有题目
subType = driver.find_element_by_class_name("title").text#科目类别
subCon = driver.find_element_by_xpath("//*[@class='title set-title']").text#科目考核内容
queNum = driver.find_element_by_class_name("zts-yz").text#获取本页题目总数
for i in range(int(re.findall(r"\d+\.?\d*",queNum)[0])):
queOne = []#保存一道题目
queOne.append(subType)#所属学科
queOne.append(subCon)#考核内容
eleQue=driver.find_element_by_id("layer-photos-demo"+str(i+1))
queCon=eleQue.find_element_by_class_name("form-label").text #题干
queOne.append(queCon)
queAns=eleQue.find_elements_by_tag_name("input")
for q in queAns:
if q.get_attribute("value")=="A":#选项A
queAnsA=q.get_attribute("title")
if queAnsA: queOne.append(queAnsA)
if q.get_attribute("value")=="B":#选项B
queAnsB=q.get_attribute("title")
if queAnsB:queOne.append(queAnsB)
if q.get_attribute("value")=="C":#选项C
queAnsC=q.get_attribute("title")
if queAnsC:queOne.append(queAnsC)
if q.get_attribute("value")=="D":#选项D
queAnsD=q.get_attribute("title")
if queAnsD:queOne.append(queAnsD)
if q.get_attribute("value")=="E":#选项E
queAnsE=q.get_attribute("title")
if queAnsE:queOne.append(queAnsE)
queOpts = eleQue.find_elements_by_tag_name("i")
queOpts[0].click()
time.sleep(2)
quePar=eleQue.find_element_by_class_name("dats").text.replace('\n','')#提取文本并替换分行符
queOne.append(quePar[quePar.find("正确答案"):])#正确答案及解析
print (queOne)
queAll.append(queOne)
data = pd.DataFrame(queAll,columns=['subType', 'subCon', 'queCon','queAnsA','queAnsB','queAnsC','queAnsD','queAnsE','quePar'])#转DataFrame
data.to_csv("D:\\tmp\\med\\6.csv",index=False,sep='|')
#关闭浏览器
driver.close()
driver.quit()