爬取新闻内容

最新推荐文章于 2024-09-22 17:12:26 发布

渊博自习室

最新推荐文章于 2024-09-22 17:12:26 发布

阅读量37

点赞数 1

文章标签： python

本文链接：https://blog.csdn.net/m0_61382108/article/details/130176914

版权

from bs4 import BeautifulSoup
import requests
import os 


if not os.path.exists("chinews.txt"):
    url = "https://finance.sina.com.cn/china/2023-04-14/doc-imyqivpp0171641.shtml"
    html = requests.get(url)
    #html.encoding = "utf-8"
    soup = BeautifulSoup(html.text, "lxml")
    title = soup.find("h1")
    article = soup.find("div",{"id":"artibody"})
    news_title = title.text
    with open("chinews.txt","w",encoding = "utf-8") as f: #创建文件
        f.write(news_title + "\n")
        print(news_title)
        for news_content in article.find_all("p"):
            f.write(news_content.get_text())
            print(news_content.get_text())
else:
    with open("chinews.txt","r",encoding="utf-8") as f:  读取文件
        lines = f.readlines()
        news_title = lines[0]
        news_content = lines[1:]
        print(news_title)
        print(news_content)