#国家科学自然基金自动输入爬虫
from selenium import webdriver
import csv
import time
option=webdriver.ChromeOptions()
# 打开参数
option.add_experimental_option('excludeSwitches', ['enable-automation'])
browser = webdriver.Chrome(options=option)
browser.implicitly_wait(10)
url = 'http://www.izaiwen.cn/'
def start_spider():
# 请求url
browser.get(url)
time.sleep(2)
data = []
with open('sqhf.csv', encoding="gbk") as csvfile:
csv_reader = csv.reader(csvfile) # 使用csv.reader读取csvfile中的文件
birth_header = next(csv_reader) # 读取第一行每一列的标题
for row in csv_reader: # 将csv 文件中的数据保存到birth_data中
data.append(row)
print(data)
for i in range(1,len(data)):#len(data)
try:#异常处理,有的人查询不到
time.sleep(1)
print(i)
browser.find_element_by_id('psnname').clear()
browser.find_element_by_id('orgname').clear()
browser.find_element_by_id('orgname').send_keys(data[i][0])
browser.find_element_by_id('psnname').send_keys(data[i][1])
browser.find_element_by_class_name('layui-btn').click()
time.sleep(1)
a = browser.find_element_by_xpath('//*[@id="data_view"]/tbody')
tr_content = a.find_elements_by_tag_name("tr") # 进一步定位到表格内容所在的tr节
data_td=[]
data_zong=[]
x=0
fanyenum=len(tr_content)
for tr in tr_content:
if x%2==0:
print(i)
print(data[i][1])
data_zong.append(data[i][1])
data_zong.append(data[i][0])
qian =tr.find_elements_by_tag_name("td")
for td in qian:
data_td.append(td.text)
# for td in tds:
# data_td.append(td.text)
print(data_td)
data_zong.append(data_td[4])
data_zong.append(data_td[5])
data_td.clear()
x+=1
else:
mingcheng=tr.find_elements_by_tag_name('legend')
for ming in mingcheng:
data_zong.append(ming.text)
print(data_zong)
break
x+=1
with open('jijindata.csv', 'a', encoding='utf-8', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(data_zong)
data_zong .clear()
# try:
# time.sleep(2)
# browser.find_element_by_id('iframeResult')
# except NoSuchElementException:
# print('no')
# browser.switch_to.parent_frame()#切换到父iframe
# x=browser.switch_to.frame('iframeResult')
# browser.switch_to.frame('iframeResult')
# a = browser.find_element_by_class_name('s_table')
# tr_content = a.find_elements_by_tag_name("tr") # 进一步定位到表格内容所在的tr节点
# for tr in tr_content:
# tds =tr.find_elements_by_tag_name("td") # 进一步定位到表格内容所在的td节点
# for td in tds:
#
# lst.append(td.text)
# with open('D:\Python_DATA\data.csv','a', encoding='utf-8',newline='') as csvfile:
# writer = csv.writer(csvfile)
# writer.writerow(lst)
# lst.clear()
# # a=browser.find_element_by_class_name('s_tabletd_rb')
try:
# browser.switch_to.default_content()
if fanyenum==20:#第二页
browser.find_element_by_xpath("//*[@id='layui-laypage-3']/a[3]").click()
time.sleep(1)
a = browser.find_element_by_xpath('//*[@id="data_view"]/tbody')
tr_content = a.find_elements_by_tag_name("tr") # 进一步定位到表格内容所在的tr节
data_td = []
data_zong = []
x = 0
for tr in tr_content:
if x % 2 == 0:
print(i)
print(data[i][1])
data_zong.append(data[i][1])
data_zong.append(data[i][0])
qian = tr.find_elements_by_tag_name("td")
for td in qian:
data_td.append(td.text)
# for td in tds:
# data_td.append(td.text)
print(data_td)
data_zong.append(data_td[4])
data_zong.append(data_td[5])
data_td.clear()
x += 1
else:
mingcheng = tr.find_elements_by_tag_name('legend')
for ming in mingcheng:
data_zong.append(ming.text)
print(data_zong)
break
x += 1
with open('jijindata.csv', 'a', encoding='utf-8', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(data_zong)
data_zong.clear()
except:
print("没有下一页")
except:
print('查无信息')
continue
if __name__ == '__main__':
start_spider()
#browser.close()
print("爬取完成,请到相应文件夹查看!")
国家科学自然基金selenium自动输入爬虫
最新推荐文章于 2023-05-27 13:55:44 发布