from time import sleep from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By try: service = Service('chromedriver.exe') driver = webdriver.Chrome(service=service) driver.get('http://quanxiaoshuo.org/243825/') #小说网站 ji = int(input('请输入要下载到的章节')) #交互式输入章节 for i in range(1,ji+1): ji = ji #全局变量到for循环里面必须要这样 if i == 1: sleep(1) elements = driver.find_element(By.XPATH, f'/html/body/div[9]/div[{i}]/a').get_attribute('href') driver.get(elements) elif i == 17: continue else: sleep(1) driver.get('http://quanxiaoshuo.org/243825/') elements = driver.find_element(By.XPATH, f'/html/body/div[9]/div[{i}]/a').get_attribute('href') driver.get(elements) print(f'第{i}页') title = driver.find_element(By.TAG_NAME, 'h1').text text = driver.find_element(By.XPATH,'//*[@id="content"]').text print(title) #小说一个章节一个文档 # with open(r'D:\workspace\book\全球高武-'+title+'.txt','a',encoding='utf-8') as f: #下载章节目录到本地 # f.write('\n'+title+'\n'+text) #所有章节都在一个文档 with open(r'D:\workspace\全球高武小说.txt','a', encoding='utf-8') as f: f.write('\n' + title + '\n' + text +'\n'+ "*"*120 + '\n') except: print('错误') finally: sleep(3) driver.quit()
使用selenium进行爬取网络小说
最新推荐文章于 2024-05-08 21:32:09 发布