import requests
from bs4 import BeautifulSoup
def get_novel_content(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
# 提取 <h3> 和 <p> 标签的内容
h3_tags = soup.find_all('h3')
p_tags = soup.find_all('p')
content = ""
rights_reserved_found = False
if h3_tags:
content += h3_tags[0].get_text() + "\n"
else:
print(url + "is not exeit")
for tag in p_tags:
if rights_reserved_found:
# 如果已经找到 "All rights reserved",则停止添加内容
break
if "All rights reserved" in tag.get_text():
rights_reserved_found = True
if rights_reserved_found:
# 如果已经找到 "All rights reserved",则停止添加内容
break
content += tag.get_text() + "\n"
return content
else:
print(f"Failed to retrieve content, status code: {response.status_code}")
return None
def save_to_file(filename, content, chapter_title):
with open(filename, 'a', encoding='utf-8') as file:
if chapter_title not in content:
file.write(f"{chapter_title}\n")
file.write(content + '\n\n')
def main():
base_url = 'https://boxnovel.com/novel/the-legendary-mechanic-boxnovel/'
chapter_count = 1463 # 1463 Set the chapter count here
file_name = 'The_Legendary_Mechanic.txt' # The name of the file to save all chapters
for i in range(1, chapter_count + 1):
url = f"{base_url}chapter-{i}"
if i == 1463:
url += '-end'
content = get_novel_content(url)
if content:
chapter_title = f"Chapter {i}: " # 假设章节标题格式为 "Chapter 1:"
save_to_file(file_name, content, chapter_title)
print(f"Saved: Chapter {i}")
else:
print(f"Failed to retrieve chapter {i}")
print("All chapters have been saved to The_Legendary_Mechanic.txt")
if __name__ == "__main__":
main()