import re
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import Select
import csv
import time
# browser = webdriver.Chrome()
# 最小化窗口
options = webdriver.ChromeOptions()
prefs = {
'profile.default_content_setting_values': {
'images': 2,
'permissions.default.stylesheet':2
}
}
options.add_experimental_option('prefs', prefs)
browser = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=options)
url='https://kns.cnki.net/kns/brief/result.aspx?dbprefix=SCOD'
browser.get(url)
time.sleep(2)
browser.find_element_by_id('Form1')
browser.find_element_by_class_name('main_sh')
Select(browser.find_element_by_id("txt_1_sel")).select_by_value("SQR")#选择申请人搜索
browser.find_element_by_id('date_gkr_from').send_keys('2019-01-01')#填公开日起始日期
browser.find_element_by_id('date_gkr_to').send_keys('2020-01-01')#填公开日结束日期
f = open('D:\ptang\data2.csv', 'r')
content = f.read()
final_list = list()
lst=[]
rows = content.split('\n')
for row in rows:
final_list.append(row.split(','))
for i in final_list:
print(i)
# browser.find_element_by_id('txt_1_value1').send_keys(i)#填公司名字
browser.find_element_by_xpath('//*[@id="txt_1_value1"]').send_keys(i)
# browser.find_element_by_id('btnSearch').click()#点击搜索
btn_div=browser.find_element_by_xpath('//*[@id="btnSearch"]')
browser.execute_script("arguments[0].click();", btn_div)
time.sleep(3)
try:
browser.find_element_by_id('iframeResult')# 定位到iframe
except NoSuchElementException:
print('no')
browser.switch_to.parent_frame() # 切换到父iframe
browser.switch_to.frame('iframeResult')
shuzi = browser.find_element_by_class_name('pagerTitleCell').text
shuzi=re.findall(r"\d+",shuzi)
q=browser.find_element_by_class_name('GridTableContent')
www=q.find_element_by_tag_name('tbody')
tr_content =www.find_elements_by_tag_name("tr")
for tr in tr_content:
zhuanlihao=tr.find_element_by_tag_name('input').get_attribute('value')
print(zhuanlihao)
zhuanlihao=re.findall(r"CN.*?!", zhuanlihao)
print(zhuanlihao)
# name=tr.find_element_by_class_name('name')
# lst.append(name)
# dates=tr.find_elements_by_class_name('date')
# for data in dates:
# lst.append(data)
lst.append(zhuanlihao)
while True:
try:
yeshu = browser.find_element_by_class_name('topTurnSpan')
yeshu.find_element_by_id('Page_next').click()
js = 'var action=document.documentElement.scrollTop=10000'
# 设置滚动条距离顶部的位置,设置为 10000, 超过10000就是最底部
browser.execute_script(js) # 执行脚本
js = 'var action=document.documentElement.scrollTop=0' # 回到顶部
browser.execute_script(js)
time.sleep(3)
q = browser.find_element_by_class_name('GridTableContent')
www = q.find_element_by_tag_name('tbody')
tr_content = www.find_elements_by_tag_name("tr")
for tr in tr_content:
zhuanlihao=tr.find_element_by_tag_name('input').get_attribute('value')
print(zhuanlihao)
zhuanlihao = re.findall(r"CN.*?!", zhuanlihao)
print(zhuanlihao)
# name=tr.find_element_by_class_name('name')
# lst.append(name)
# dates=tr.find_elements_by_class_name('date')
# for data in dates:
# lst.append(data)
lst.append(zhuanlihao)
except:
break
print(lst)
x=str(lst).count("U")
y=str(lst).count('S')#外观设计
z=str(lst).count('A')#发明申请
zz=str(lst).count('B')#发明申请专利
print(x)
lst.clear()
lst.append(x)
lst.append(y)
lst.append(z)
lst.append(zz)
lst.append(shuzi)
print(lst)
# if x==0 and y==0 and z==0 and zz==0:
# break
with open('D:\ptang\data3.csv','a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(lst)
lst.clear()
# browser.find_element_by_id('txt_1_value1').clear()
browser.switch_to.default_content()
browser.find_element_by_id('txt_1_value1').clear()
print('有到这')
time.sleep(1)
# browser.find_element_by_xpath('//*[@id="txt_1_value1"]').clear()
print(final_list)
专利网站selenium自动输入申请人、日期爬虫
最新推荐文章于 2023-01-06 13:21:04 发布