#一个HTML文件,找出里面的正文与链接 import requests from bs4 import BeautifulSoup def search_body_urls(path): #path = 'http://mil.news.sina.com.cn/china/2017-04-05/doc-ifycwymx3854291.shtml' page = requests.get(path) page.encoding = 'utf-8' soup = BeautifulSoup(str(page.text),'html.parser') article = soup.select('.content')[0].text urls = soup.findAll('a') for u in urls: print(u['href']) print(article) if __name__ == '__main__': search_body_urls(path='http://mil.news.sina.com.cn/china/2017-04-05/doc-ifycwymx3854291.shtml')
截图如下: