import requests from bs4 import BeautifulSoup url = "http://www.cntour.cn/" response = requests.get(url) content = response.text soup = BeautifulSoup(content) data = soup.select('#main > div > div.mtop.firstMod.clearfix > div.centerBox > ul.newsList > li > a') for item in data: print("hot topic:"+item.get_text('title')) newurl = item.get('href') newresponse = requests.get(newurl) newsoup = BeautifulSoup(newresponse.text) newdata = newsoup.select('#main > div > div.newListBox.clearfix > div.leftBox > div.newShow > div.content.reset') all = newdata[0].find_all('p') for each in all: if(each.string == None): continue print(each.string) print(5*"\n")
关于旅游新闻网站的简单爬虫
最新推荐文章于 2024-04-10 17:39:57 发布