七、登录知网帐号
def login_pw(driver): # 用密码帐号登录
# 打开页面,等待几秒
driver.get("https://kns.cnki.net/kns8/AdvSearch")
time.sleep(3)
# 点击登录按钮调出登录窗口
search_button = driver.find_element(By.ID, "Ecp_top_login_show")
search_button.click()
time.sleep(2)
# 帐号密码登录
input_element = driver.find_element(By.ID, 'Ecp_TextBoxUserName') # 获取帐号输入框元素
input_element.send_keys('xxxxxxxx') # 输入你自知网帐号
input_element = driver.find_element(By.ID, 'Ecp_TextBoxPwd') # 获取密码输入框元素
input_element.send_keys('yyyyyyyy') # 输入相应帐号的密码
driver.find_element(By.CLASS_NAME, 'agreement-label').click() # 勾选同意
driver.find_element(By.CLASS_NAME,'login-alert-btn2').click() # 点击登录
time.sleep(2)'''
点击登录按钮:
弹出登录界面:
输入帐号密码:
勾选登录即同意:
点击登录按钮登录:
八、批量获取每页论文引用信息:
def quote(driver, res_num): # 从检索页面导出全部引用
# 修改每页显示数为50 OK
search_shuzi = driver.find_element(By.ID, 'perPageDiv')
search_shuzi.click() # 点击默认的20
WebDriverWait(driver, 100).until(
EC.visibility_of_element_located((By.CSS_SELECTOR,'li[data-val="50"]'))).click()
# 移动并点击重选的50
page_num_50 = int(res_num / 50) + 1
# print(f"共有 {res_num} 条结果,共 {page_num_50} 页。")
page = 1
while page <= page_num_50 + 1:
time.sleep(5)
# 获取引用信息
# 如只需某条的引用,可勾选某条
# driver.find_element(By.XPATH, / html/body/div[2]/div[2]/div[2]/div[2]/div/div[2]/div/div/div/div/table/tbody/tr/td[{i}]/input").click()
driver.find_element(By.CLASS_NAME, "checkAll").click() # 勾选全选
# 点击导出与分析
driver.find_element(By.CSS_SELECTOR, '#batchOpsBox > li:nth-child(2) > a').click()
# 移动并点击导出文献
WebDriverWait(driver, 100).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '#batchOpsBox>li:nth-child(2)>ul>li.export>a'))).click()
driver.implicitly_wait(10) # 隐式等待
# 打开新窗口
driver.find_element(By.XPATH,
'/html/body/div[2]/div[2]/div[2]/div[2]/div/div[1]/div/div[2]/ul[1]/li[2]/ul/li[1]/ul/li[1]/a').click()
driver.implicitly_wait(10) # 隐式等待
n2 = driver.window_handles # 切换窗口,否则会出错,应不是当前窗口的原因
if len(n2) > 1:
# driver.switch_to.window(n2[0])
# driver.close()
driver.switch_to.window(n2[1])
driver.find_element(By.CLASS_NAME, 'icon.icon-export').click() # 导出,结果在“下载目录”中,空格改点
time.sleep(5)
print(f'第{page}页引用已经导出,请到"文件夹"中查看。')
driver.close()
driver.switch_to.window(n2[0])
page += 1
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//a[@id='PageNext']"))).click()
driver.implicitly_wait(5) # 隐式等待
点击导出与分析:
点选“导出文献→GB/T 7714-2015 格式引文”弹出新窗口: