#!/usr/bin/python
#encoding=utf-8
__author__ = 'Administrator'
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
import selenium
import sys
import time
import re
import csv
if __name__ == "__main__":
import os
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
chromedriver = "/home/henson/Documents/pycharm/webdriver/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.get('http://kns.cnki.net/kns/brief/result.aspx?dbprefix=CJFQ')
#inputElement = driver.find_element_by_xpath("//*[@id='b_Text0']")#//*[@id="b_Text0"]
inputElement = driver.find_element_by_name("txt_1_value1")
#inputElement = driver.find_element_by_onkeypress("EnteryKeyno(event)")
searchWord="水"
inputElement.send_keys((searchWord))
driver.find_element_by_xpath("//*[@id='ddSubmit']/span").click() #检索
driver.find_element_by_xpath("//*[@id='btnSearch']").click()
time.sleep(2)
currentURL=driver.current_url
urlList=[]
localDir = '/home/henson/Downloads/paper'
driver.find_element_by_xpath("//*[@id='XuekeNavi_Div']/div[1]/input[1]").click() #清除
driver.find_element_by_xpath("//*[@id='B']/span/img[1]").click() # 1st
time.sleep(2)
driver.find_element_by_xpath("//*[@id='B027first']").click()#2nd
time.sleep(2)
driver.find_element_by_xpath("//*[@id='B027child']/dd[1]/a").click() # 环境科学
#element = WebDriverWait(driver, 10).until(lambda x: x.find_element_by_class_name("fz14")) #设置timeout确定是否加载完成
time.sleep(5)
driver.switch_to_frame('iframeResult') # 定位到页面元素
driver.find_element_by_xpath('//*[@id="id_grid_display_num"]/a[3]').click() ##追加的批量50
f = open("/home/henson/Downloads/cnki/test.csv", "a+", encoding='utf-8')
writer = csv.writer(f)
k = 1
for j in range(1, 300):
driver.find_element_by_xpath('//*[@id="selectCheckbox"]').click() # 全选
driver.find_element_by_xpath('//*[@id="J_ORDER"]/tbody/tr[2]/td/table/tbody/tr/td[1]/div/a[2]').click() # 导出
now_handle = driver.current_window_handle # 获取当前窗口句柄
all_handles = driver.window_handles # 获取所有窗口句柄
for handle in all_handles:
if (handle != now_handle):
driver.switch_to_window(handle)
# print("new web" + driver.current_url)
time.sleep(2)
driver.find_element_by_xpath('//*[@id="SaveTypeList"]/li[11]/span[1]/a').click() # 自定义
driver.find_element_by_xpath('//*[@id="selfDefine"]/table/tbody/tr[4]/td/input[1]').click() # 导出全选
driver.find_element_by_xpath('//*[@id="selfDefine"]/table/tbody/tr[1]/td[1]/input').click() # 去来源库
driver.find_element_by_xpath('//*[@id="selfDefine"]/table/tbody/tr[3]/td[2]/input').click() # 去期
driver.find_element_by_xpath('//*[@id="selfDefine"]/table/tbody/tr[3]/td[3]/input').click() # 去页码
driver.find_element_by_xpath('//*[@id="exportExcel"]').click() # Excel
driver.close()
# print(now_handle) # 输出主窗口句柄
driver.switch_to_window(now_handle) # 返回主窗口
# driver.switch_to.window(now_handle)
# driver.switch_to_frame('iframeResult') # 定位到页面元素
time.sleep(5)
driver.switch_to.frame('iframeResult')
driver.find_element_by_xpath('//*[@id="selectCheckbox"]').click() # 全选清除
try:
now_handle = driver.current_window_handle #获取当前页面窗口
all_handles = driver.window_handles
for handle in all_handles:
if (handle != now_handle):
driver.switch_to_window(handle)
tbody = driver.find_element_by_xpath('//*[@id="ctl00"]/table/tbody/tr[2]/td/table/tbody') #tag:tbody
showAlls = tbody.find_elements_by_class_name('showAll') #展开隐藏的选项
for e in showAlls:
e.click()
trs = tbody.find_elements_by_tag_name('tr')
del trs[0] #删除第一行的标题栏
for tr in trs:
tds = tr.find_elements_by_tag_name('td') #遍历td的标签
data = [] #新建数据列表
#=for td in tds[0:7]:
# data.append(td.text)
num=tds[0].text
title=tds[1].text
author=tds[2].text
source=tds[3].text
date=tds[4].text
cited=tds[5].text
down=tds[6].text
data.append((num, title, author, source,date,cited,down)) #追加到列表里
print(data)
writer.writerows(data)
except NoSuchElementException:
print('element cannot be found!')
#driver.find_element_by_xpath('//*[@id="ctl00"]/table/tbody/tr[3]/td/table/tbody/tr/td/div/a[9]').click()
driver.find_element_by_css_selector('div.TitleLeftCell').find_elements_by_tag_name('a')[-1].click()
关于同一条语句在同一个URL上,
driver.find_element_by_css_selector(‘div.TitleLeftCell’).find_elements_by_tag_name(‘a’)[-1].click()
出现在不同.py脚本,一个能使,一个不能使,然后把它们合体的结果就是都不能使了。为啥为啥。
其实在别人电脑上是能跑的,为什么我的就不能。。。