from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import time import pandas as pd browser = webdriver.Chrome() browser.minimize_window() wait = WebDriverWait(browser, 5) url = 'https://data.eastmoney.com/report/stock.jshtml' def index_page(page): try: browser.get(url) data = browser.page_source table = pd.read_html(data) print(table) print('正在爬取第: %s 页' % page) # wait.until( # EC.presence_of_element_located((By.ID, "#stock_table"))) # 判断是否是第1页,如果大于1就输入跳转,否则等待加载完成。 if page > 1: # 确定页数输入框 input = wait.until(EC.presence_of_element_located( (By.XPATH, '//*[@id="gotopageindex"]'))) input.click() input.clear() input.send_keys(page) submit = wait.until(EC.element_to_be_clickable( (By.CSS_SELECTOR, '.gotopage .btn'))) submit.click() time.sleep(2) # 确认成功跳转到输入框中的指定页 wait.until(EC.text_to_be_present_in_element( (By.CSS_SELECTOR, '.gotopage .btn'), str(page))) except Exception: return None def main(): for page in range(1,4): # 测试翻4页 index_page(page) if __name__ == '__main__': main()
求解:使用selenium爬取东方财富网时,已经模拟分页但并没有爬取到第一页之后的分页数据,该如何解决?
最新推荐文章于 2024-06-24 14:37:35 发布