颂读历史经典,传承文化血脉。
网址:‘https://www.xuexi.cn/4b3be22f3d07308c2ce39e5ca84a499c/13a0b6125672c7157db254071de2c1f0.html’
1.模拟打开网址。
browse.get('https://www.xuexi.cn/4b3be22f3d07308c2ce39e5ca84a499c/13a0b6125672c7157db254071de2c1f0.html')
2.按照历史顺序拆分的各年代板块列表
list0 = browse.find_elements(By.CSS_SELECTOR,'.tab-wrapper.horizontal-item')
3.选定板块,点击“更多”,得到该板块所有内容列表。
如,browse.find_element(By.CSS_SELECTOR,'#f44f div.hotItem').click() # 点击更多
4.得到文章标题列表。
list_1 = browse.find_elements(By.CSS_SELECTOR, 'div.text-link-item-title')
5.点击文章标题,浏览文章。
dingwei = list_1[i].find_element(By.CSS_SELECTOR, '.text-wrap .text') title = dingwei.text dingwei.click() all_window_handles = browse.window_handles browse.switch_to.window(all_window_handles[-1]) time.sleep(2) print(f"诗词内容:", title) for i in range(1, 60): browse.execute_script(f"window.scrollTo(0, document.body.scrollHeight / (60/{i}));") time.sleep(1)
6.上代码:
import time from selenium import webdriver from selenium.webdriver.common.by import By import re import datetime browse = webdriver.Edge() browse.maximize_window() # 窗口最大化 browse.implicitly_wait(30)
print('版块:中华诗词经典') browse.get('https://www.xuexi.cn/4b3be22f3d07308c2ce39e5ca84a499c/13a0b6125672c7157db254071de2c1f0.html') time.sleep(1) list0 = browse.find_elements(By.CSS_SELECTOR,'.tab-wrapper.horizontal-item') for item0 in list0: print(f'{item0.text}部分:') item0.click() time.sleep(1) browse.find_element(By.CSS_SELECTOR,'#f44f div.hotItem').click() time.sleep(1) all_window_handles0 = browse.window_handles browse.switch_to.window(all_window_handles0[-1]) page_info = browse.find_element(By.CSS_SELECTOR, 'div._123TewfUWxUbDqs5ecsq6k') if page_info: list_page = page_info.find_elements(By.CSS_SELECTOR, '.btn') page_end = int(list_page[-1].text) page = 1 while page <= page_end: list_1 = browse.find_elements(By.CSS_SELECTOR, 'div.text-link-item-title') for i in range(len(list_1)): dingwei = list_1[i].find_element(By.CSS_SELECTOR, '.text-wrap .text') title = dingwei.text dingwei.click() all_window_handles = browse.window_handles browse.switch_to.window(all_window_handles[-1]) time.sleep(2) print(f"诗词内容:", title) for i in range(1, 60): browse.execute_script(f"window.scrollTo(0, document.body.scrollHeight / (60/{i}));") time.sleep(1) browse.close() browse.switch_to.window(all_window_handles[-2]) page += 1 list_page = page_info.find_elements(By.CSS_SELECTOR, '.btn') for page_ in list_page: if page_.text == str(page): page_.click() break time.sleep(1) browse.close() browse.switch_to.window(all_window_handles0[-2])