ppt下载
from bs4 import BeautifulSoup
import requests
import time
from threading import Thread
print("正在下载ppt")
url = "https://www.1ppt.com/"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37"
,"Referer": "https://www.1ppt.com/"
}
response = requests.get(url=url,headers=headers)
response.encodings='utf-8'
html = BeautifulSoup(response.text,"html.parser")
col_nav = html.find(class_="col_nav i_nav clearfix").find_all("a")
list_a = list()
for i in col_nav:
href = i.get("href")
hrefs = "https://www.1ppt.com/" + href
time.sleep(1)
response1 = requests.get(url=hrefs,headers=headers)
html1 = BeautifulSoup(response1.text,"html.parser")
a = html1.find(class_="tplist").find_all("a")
for j in a:
href1 = j.get("href")
a_href = "https://www.1ppt.com" + href1
if a_href not in list_a:
list_a.append(a_href)
time.sleep(0.5)
print(f"获取第{j}个网页")
for i in list_a:
print(f"下载第{i}个网页")
reponse2 = requests.get(url=i)
html2 = BeautifulSoup(reponse2.text,"html.parser")
downurllist_a = html2.find(class_="downurllist").find_all("a")
down_ppt = requests.get(url=downurllist_a)
html2 = BeautifulSoup(down_ppt.text, 'html.parser')
a1 = html2.find(class_='c1').find_all('a')
for x in a1:
href1 = x.get('href')
down_zip_ppt = requests.get(url=href1)
with open(i + ".zip",mode="wb") as f:
f.write(down_zip_ppt.content)
- 其中User-Agent和Referer进入到https://www.1ppt.com/中打开开发者工具
- 点击网络
- 点击Fetch/XMR中的唯一一个包复制他们两个的值即可