http://www.aoshu.com/e/20190719/5d3130b205314.shtml有一些小学的练习题,家里有娃只好自己写个程序下载
#!C:\Python37 # -*- coding:utf-8 -*- import requests import os from bs4 import BeautifulSoup def downpic(url,unit): pageurls = [] pageurls.append(url) (base,ext)= os.path.splitext(url) #print(base,ext) for i in range(2,11): #pass pageurls.append(base + '_' + str(i)+ext) #print(pageurls) page =1 for pageurl in pageurls: #pageurl = 'http://www.aoshu.com/e/20190719/5d3128d7a53bd.shtml' try: res = requests.get(pageurl) except Exception as e: print(e) pass if res.status_code != 200: continue soup = BeautifulSoup(res.text,'html.parser') plists = soup.find_all(name='p') for p in plists: imglist = p.find_all(name='img') if len(imglist) > 0: imgurl = imglist[0]['src'] #print(imgurl) (base,filename) = os.path.split(imgurl) filename= str(unit)+'_'+str(page)+filename savename = os.path.join(os.getcwd(),'math',filename) #print(savename) try: imgres = requests.get(imgurl) except Exception as e: print(e) continue with open(savename,'wb') as fw: fw.write(imgres.content) print('[+]download '+filename) page+=1 return def findUnitUrl(url): urllists=[] try: res = requests.get(url) #print(res.text) soup = BeautifulSoup(res.text,'html.parser') tablelists = soup.find_all(name='table') #print(tablelists) for table in tablelists: unitlist = table.find_all(name='a') #print(unitlist) for list in unitlist: urllists.append(list['href']) #print(urllists) return urllists except Exception as e: print(e) return def main(): url ='http://www.aoshu.com/e/20190719/5d3130b205314.shtml' uniturl=findUnitUrl(url) unit =1 for uniturl in uniturl: downpic(uniturl,unit) unit+=1 return if __name__ == '__main__': main()