from bs4 import BeautifulSoup
import requests
import os
if not os.path.exists("chinews.txt"):
url = "https://finance.sina.com.cn/china/2023-04-14/doc-imyqivpp0171641.shtml"
html = requests.get(url)
#html.encoding = "utf-8"
soup = BeautifulSoup(html.text, "lxml")
title = soup.find("h1")
article = soup.find("div",{"id":"artibody"})
news_title = title.text
with open("chinews.txt","w",encoding = "utf-8") as f: #创建文件
f.write(news_title + "\n")
print(news_title)
for news_content in article.find_all("p"):
f.write(news_content.get_text())
print(news_content.get_text())
else:
with open("chinews.txt","r",encoding="utf-8") as f: 读取文件
lines = f.readlines()
news_title = lines[0]
news_content = lines[1:]
print(news_title)
print(news_content)
爬取新闻内容
最新推荐文章于 2024-09-22 17:12:26 发布