import requests,sys,json,os,argparse
import time
requests.packages.urllib3.disable_warnings()
'''
###########################################################################
v1.1:
1、增加课件下载功能(需安装配置aria2c)
2、增加校验文件完整性功能
3、参数化
v1.0:
初步完成!
###########################################################################
'''
def getCoursesJson(coursesID):
# coursesID为课程id
if args.cour_lect:
args.coursesName,args.lectureID=args.cour_lect.split('_')
with open(f'{args.savePath}/{args.coursesName}.json','r',encoding='UTF-8') as f:
result=json.loads(f.read())
else:
resp=requests.get(f'https://api.wanmen.org/4.0/content/courses/{coursesID}',headers=args.head,verify=False)
try:
result=resp.json()
except:
print('getCoursesJson 获取CoursesJson失败!,返回的不是json数据:',resp.text)
sys.exit()
args.coursesName=result["name"]
savePath=os.path.join(args.savePath,args.coursesName)
myMkdir(savePath)
with open(f'{savePath}/{result["name"]}.json','w',encoding='UTF-8') as f:json.dump(result,f,ensure_ascii=False)
return savePath,result
def myMkdir(name):
if not os.path.exists(name):
try:os.mkdir(name)
except Exception as e:print(f'创建目录 {name} 失败:',e)
def getM3u8UrlDic(lectureID):
# lectureID为children里面的id
resp=requests.get(f'https://api.wanmen.org/4.0/content/lectures/{lectureID}',headers=args.head,verify=False)
try:
result=resp.json()
except:
print('getM3u8UrlDic 获取M3u8Url失败!,返回的不是json数据:',resp.text)
sys.exit()
try:
return result['name'],result['video']['hls']
# mobileLow,pcLow,pcHigh,pcMid,mobileMid 5种清晰度
except KeyError:
print('getM3u8UrlDic 返回json结果有误:',result)
sys.exit()
def check(savePath,coursesJson=0):
from itertools import islice
print('正在校验文件完整性……')
fileList=[]
for path,folder,files in islice(os.walk(savePath),1,None):#使用islice迭代,跳过第一个元素
for file in files:
fileList.append(file)
if not coursesJson:
with open(f'{savePath}/{args.coursesName}.json','r',encoding='UTF-8') as f:
coursesJson=json.loads(f.read())
i=0
for lecture in coursesJson['lectures']:
i+=1;j=0
for children in lecture['children']:
j+=1
videoName=f"{i}.{j} {children['name']}.mp4"
if videoName not in fileList:
print('没有:',videoName,getM3u8UrlDic(children['id'])[args.type])
for document in coursesJson['documents']:
fileName=f"{document['name']}.{document['ext']}"
if fileName not in fileList:
print('没有:',fileName,document['url'])
def main(coursesID):
savePath,coursesJson=getCoursesJson(coursesID)
i=0
for lecture in coursesJson['lectures']:
i+=1;j=0
print(i,lecture['name'])
videoPath=f"{savePath}/第{i}讲 {lecture['name']}"
myMkdir(videoPath)
for children in lecture['children']:
j+=1
if args.cour_lect:
args.coursesName,args.lectureID=args.cour_lect.split('_')
if args.lectureID==children['id']:args.lectureID=False
else:continue
videoName,urlDic=getM3u8UrlDic(children['id'])
print(videoName,urlDic+'aaaaaaaaaaaaaaaaaaaahhhhhhhhh!!!!\n\n\n')
print(urlDic+'aaaaaaaaaaaaaaaaaaaahhhhhhhhh!!!!\n\n\n\n')
time.sleep(20)
print('\n','-'*20,f'{i}-{j}',children['name'],'-'*20)
downloadCmd=f'ffmpeg -threads {args.threads} -i "{urlDic[args.type]}" -c copy -y -bsf:a aac_adtstoasc "{videoPath}/{i}.{j} {videoName}.mp4" -v repeat+level+info'
os.system(downloadCmd)
for document in coursesJson['documents']:
fileName=f"{document['name']}.{document['ext']}"
print('-'*20,'下载第',document['order'],'个课件:',fileName,'-'*20)
downloadCmd=f'aria2c -s {args.threads} "{document["url"]}" -d {savePath}/课件 -o {fileName}'
os.system(downloadCmd)
check(savePath,coursesJson)
def getParserWM():
parser=argparse.ArgumentParser(description='程序功能:\n 1、爬取万门大学课程;',formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-i',dest='coursesID',help="课程id,从课程详情中的URL中获取",required=True)
parser.add_argument('-t',dest='type',help="选择清晰度(默认 pcHigh ):\n mobileLow : 手机版低清\n mobileMid : 手机版中清\n pcLow : 电脑版低清\n pcMid : 电脑版中清\n pcHigh : 电脑版高清",required=False)
parser.add_argument('-r',dest='threads',help="指定线程(默认 16)",required=False)
parser.add_argument('-s',dest='savePath',help="指定保存路径(默认保存在脚本所在文件夹)",required=False)
parser.add_argument('-c',dest='cour_lect',help="从指定课程的小节开始,小节ID为json文件中children中的ID,格式:课程名称_小节id",required=False)
args=parser.parse_args()
return args
# ffmpeg -threads 10 -i "https://media.wanmen.org/a81d11d0-f1bb-4469-b630-f1dd674081bb_pc_high.m3u8?sign=bd93952471b6413f19874fc805532bff&t=5cb14ffb&r=e8d52086889c1d207f2ce8e000f913ea" -c copy -y -bsf:a aac_adtstoasc "人工智能、大数据与复杂系统一月特训班/第1讲 复杂系统/1.4 生活实例与本章答疑.mp4" -v repeat+info
# aria2c https://docs.wanmen.org/1ce13214b7c54dc4a7e07b7f15205de0.pdf -d 教育心理学特训班/课件 -o 教育心理学14讲.pdf
if __name__ == '__main__':
args=getParserWM()
args.head={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Connection':'close',
}
args.type=args.type if args.type else 'pcHigh'
args.threads=int(args.threads) if args.threads else 16
args.savePath=args.savePath if args.savePath else os.getcwd()
args.cour_lect=args.cour_lect if args.cour_lect else False
# main('5c7742fc7f59616cea0ec672')#教育心理学特训班
# main('593e086f206e46163b6dd5c8')#人工智能、大数据与复杂系统一月特训班
main(args.coursesID)
效果图参考