具体代码如下:
import requests
from lxml import etree
import os
if __name__=="__main__":
if not os.path.exists('./pptss'):
os.mkdir('./pptss')
headers={
'Use-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0'
}
url='http://www.51pptmoban.com/ppt/'
response=requests.get(url=url,headers=headers)
page_text=response.text
#print(page_text)
tree=etree.HTML(page_text)
a_list=tree.xpath('//div[@class="pptlist"]/dl/dd')
#list=[]
for li in a_list:
li_url = 'http://www.51pptmoban.com'+li.xpath('./div/a/@href')[0]
li_name=li.xpath('./div/a/img/@alt')[0]+'.rar'
li_name=li_name.encode('iso-8859-1').decode('gbk')
#print(li_url,li_name)
#list.append(li_url)
#print(list)
response=requests.get(url=li_url,headers=headers)
response.encoding="utf-8"
xia_text=response.text
xia_tree=etree.HTML(xia_text)
#print(xia_text)
xiazai_list=xia_tree.xpath('//div[@class="ppt_xz"]/a')
for lii in xiazai_list:
xiazai_url='http://www.51pptmoban.com'+lii.xpath('./@href')[0]
#print(xiazai_url)
final_text=requests.get(url=xiazai_url,headers=headers).text
final_tree=etree.HTML(final_text)
#print(final_text)
final_list=final_tree.xpath('//div[@class="down"]/a')
for fi in final_list:
final=fi.xpath('./@href')[0]
fin='/'+final.split("/")[-2]+'/'+final.split("/")[-1]
#print(fin)
final_url='http://www.51pptmoban.com/e/DownSys'+fin
file_ppt=requests.get(url=final_url,headers=headers).content
#print(final_url)
xia_path='./pptss/'+li_name
with open(xia_path,'wb') as fp:
fp.write(file_ppt)
print(li_name,'下载成功!!!')
运行结果为: