python新闻爬虫_python爬虫爬取新闻新闻爬取-CSDN博客

(url，r=(url，=)=(html)=(html，' html。parser ')all _ topics=soup。find _ all(' tr ')[1:]foreach _ topicinal _ topics : topic _ times=each _ topic。查找(' TD '，class_='last')#搜索指数topic_rank=each_topic.find('td '，class_='first')#排名topic_name=each_topic.find('td '，class_='keyword')#标题目iftopic_rank！=Noneandtopic _ name！=Noneandtopic_times！=none : topic _ rank=each _ topic。查找(' TD '，class_='first ').get_text().替换('，').替换('

，' ')topic_name=each_topic.find('td '，class _=' key ').get_text().替换('，').替换('

，' ')topic _ times=each _ topic。查找(' TD '，class_='last ').get_text().替换('，').替换('

，' ')#打印('排名：{}，标题：{}，热度：{} ' .格式(topic_rank，topic_name，topic_times))tplt='排名：{0:^4}\t标题：{1:{3}^15}\t热度：{2:^8}'print(tplt.format(topic_rank,topic_name,topic_times,chr(12288)))defmain():#百度热点排行榜单链接url='http://top.baidu.com/buzz？b=1fr=20811 ' headers={ ' User-Agent ' : ' Mozilla/5.0 ' } html=get _ html(URL，headers)get _ pages(html)if _ _ name _ _==' _ _ main _ _ ' : main()