import requests
from bs4 import BeautifulSoup
import re
file = open('titles.txt','r',encoding='utf8') #titles.txt是一开始就更新目录的档案
title_list = file.read() #旧的储存的标题
#以下找到新的标题
url = 'https://tw.appledaily.com/new/realtime'
res = requests.get(url)
soup = BeautifulSoup(res.text,'lxml')
tags = soup.find('ul',attrs={'class':'rtddd slvl'}) #找出新闻标题的区块
titles = tags.find_all('h1') #标题区块中 再找出含有标题的标签
for title in titles:
s = title.text[0:6] #找出新标题
if(re.search(s,title_list) == None): #新标题和旧标题比对 是新的才印出
print(title.text)
# title[0,8]新的标题去比对旧标题全文 取标题前8个字当特徵去比对
file.close()
file = open('titles.txt','w',encoding='utf8') #新标题存档
for title in titles:
str = re.sub('[\d\.\(\)]', '', title.text) #
file.write(str + '\n')
file.close()
python即时更新新闻标题
最新推荐文章于 2021-02-10 17:42:48 发布