import requests
from bs4 import BeautifulSoup
def getHtmlTree(url):
response = requests.get(url)
htmlTree = BeautifulSoup(response.text, 'lxml')
return htmlTree
def getUrl(url):
print(execurl)
urls = []
htmlTree = getHtmlTree(url)
for a in htmlTree.find_all('a'):
if (http in a['href']):
urls.append(a['href'])
for x in urls:
if (x in execurl):
pass
else:
try:
execurl.append(x)
getUrl(x)
except Exception as err:
pass
if __name__ == '__main__':
url = 'http://www.jxjsxy.edu.cn/news-list-zhaojshengfzhuanflany-1.html'
http = 'http'
execurl = ['url']
getUrl(url)
获取网站所有链接
最新推荐文章于 2022-03-18 15:08:51 发布