import os
import time
import requests
#地址转换网址,很重要
#https://curlconverter.com/
#文件夹命名
name = "GCNA第六天03"
#总视频个数
n = 245
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Origin': 'https://klkat.duanshu.com',
'Referer': 'https://klkat.duanshu.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
#确认路径
str_path = os.getcwd()
if not os.path.exists(f'{str_path}\{name}'):
os.mkdir(f'{str_path}\{name}')
#循环爬取
for i in range(0, n+1, 1):
url = f'https://vod.duanshu.com/e4a62924vodtransgzp1253562005/adcf19473270835015596048637/v.f230_{i}.ts?sign=4b338001a51b71cd1422e5812eecc56c&t=66f51184&us=1370&whref=*.duanshu.com'
#url = f'https://vod.duanshu.com/e4a62924vodtransgzp1253562005/99464ce83270835014714179777/v.f230_{i}.ts?sign=785a53c202813acec71fb77ab348a43b&t=66f4f49b&us=9523&whref=*.duanshu.com'
response = requests.get(url, headers=headers, )
# 访问视频的链接获取视频内容,所有的媒体文件都用content
video_content = response.content
# 将视频的内容保存到本地
with open(f"{name}/{name}_{i}.mp4", "wb") as file:
#指定保存的内容
file.write(video_content)
print(f"{name}_{i}" + f"\t>>>>>>>>>>下载成功!剩余{n-i}个待下载")
# 防止爬得太快触发风控
#time.sleep(0.1)
print('完成')
Python爬虫
最新推荐文章于 2024-09-27 10:11:28 发布