登录界面是在iframe表单中,要获取iframe表单中的数据必须先进入iframe表单
模块selenium
在定位网页中的数据时,如果标签是在iframe表单种,直接定位是找不到的,必须跳转到iframe表单中才能整个程序中最重要的就是selenium中进入iframe标签,代码如下:
iframe_element = browser.find_element_by_xpath()
browser.switch_to_frame(iframe_element)
from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get("https://study.163.com/")
browser.maximize_window()
#同意协议按钮
agree_button = browser.find_element_by_xpath(".//span[@class='ux-btn th-bk-main ux-btn- ux-btn- ux-modal-btn um-modal-btn_ok th-bk-main']")
agree_button.click()
#关闭提示表单按钮
form_close = browser.find_element_by_xpath(".//i[@class='ux-icon ux-icon-close']")
form_close.click()
#登录按钮
login_button = browser.find_element_by_xpath(".//a[@class='f-fr j-nav-loginBtn loginBtn']")
login_button.click()
time.sleep(5)
#iframe元素
iframe_element = browser.find_element_by_xpath(".//div[@class='ux-modal mn-login-dialog ux-modal-fadeIn']//div[@id='j-ursContainer-1']/iframe")
browser.switch_to_frame(iframe_element) #这一步最重要,要进入到iframe元素中,否则下边的都无法进行
tel = browser.find_element_by_xpath(".//input[@type='tel']") #账号输入标签
password = browser.find_element_by_xpath(".//input[@class='j-inputtext dlemail'][1]") #密码输入标签
submit = browser.find_element_by_xpath(".//a[@id='submitBtn'][1]")
tel.send_keys("13839817517") #输入账号
password.send_keys("yhr104653") #输入密码
submit.click() #提交
#browser.switch_to.default_content()
#print(len(browser.page_source))
#print(browser.page_source)
browser.refresh()
#搜索输入框
search = browser.find_element_by_xpath(".//div[@class='m-indextopwrap f-pr']//div[@class='box j-search f-cb']/input")
#提交搜索框
go_search = browser.find_element_by_xpath(".//div[@class='m-indextopwrap f-pr']//div[@class='submit j-submit f-pa']/span[text()='搜索']")
#输入要搜索的关键词
search.send_keys("python")
go_search.click()
time.sleep(5)
if __name__ == "__main__":
while 1:
try:
next = browser.find_element_by_xpath(".//a[@class='th-bk-disable-gh']")
except:
next = None
lesson_names = browser.find_elements_by_xpath(".//ul[@class='uc-course-list_ul']/li//span[@class='uc-ykt-coursecard-wrap_tit_name']")
for i in range(len(lesson_names)):
print(lesson_names[i].text)
if next:
next.click()
time.sleep(5)
else:
break