import feedparser
import re
import requests
from bs4 import BeautifulSoup
import jieba
def gettitle(url):
# 发送HTTP请求
response = requests.get(url)
# 确保网页请求成功
if response.status_code == 200:
# 使用BeautifulSoup解析网页内容
soup = BeautifulSoup(response.text, 'html.parser')
# 提取标题
title = soup.title.text
print(title)
else:
print('Failed to retrieve the webpage')
title = ''
return title
# Returns title and dictionary of word counts for an RSS feed
def getwordcounts(url):
# Parse the feed
d = gettitle(url) # feedparser.parse(url)
wc = {}
title = ""
# 提取单词
words = jieba.cut(d)
for word in words:
if removesymbols(word):
wc.setdefault(word, 0)
wc[word] += 1
if len(wc) == 1:
title = word
return title,
Python集体智慧编程之发现群组--爬取URL
最新推荐文章于 2025-06-01 21:09:33 发布

最低0.47元/天 解锁文章
3240

被折叠的 条评论
为什么被折叠?



