import requests,bs4,os url = "http://www.baidu.com" os.makedirs('xkcd',exist_ok=True) res = requests.get(url) res.raise_for_status() resHtml = bs4.BeautifulSoup(res.text,'html.parser') linkUrl = resHtml.select('a[href]') for i in range(len(linkUrl)): vari = linkUrl[i].get('href') if vari.startswith("http"): sonUrl = vari elif vari.startswith("#"): continue else: sonUrl = "http://www.baidu.com"+ vari temp = requests.get(sonUrl) sonFile = open(os.path.join('xkcd',str(i)+'.html'),'wb') for chunk in temp.iter_content(100000): sonFile.write(chunk) sonFile.close() print("%s第%d个任务已完成"%(sonUrl,i)) print("任务已完成!")
第11章 链接验证
最新推荐文章于 2023-02-09 15:26:20 发布