预期效果
该公司有198页公告,每一页有若干公告,通过代码爬取每一个公告标题日期等信息并根据关键字判断是否为所需要的信息,对所需要的公告位置作标记。
代码实现
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
import csv
import re
import time
# from bs4 import BeautifulSoup
driver = webdriver.Chrome()
# wait=driver.implicitly_wait(10)
wait=WebDriverWait(driver, 30,1)
def search(i):
print('准备翻第{}页'.format(i))
try:
time.sleep(1.5)
# smart_wait(self,"#pg_noticelist > a.nextP