某教育网站视频抓取

最新推荐文章于 2024-07-12 16:16:27 发布

°纸鸢栀年°

最新推荐文章于 2024-07-12 16:16:27 发布

阅读量391

点赞数

文章标签： python

本文链接：https://blog.csdn.net/qq_40849557/article/details/121289197

版权

m3u8视频抓取并解密

某教育网站视频抓取
运行中
运行结果

某教育网站视频抓取

直接上代码

import requests
import json
import time
import urllib3
from Crypto.Cipher import AES
import os
import re
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import binascii
import threadpool

headers = {
    "Cookie": "",#手动获取
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
}

path = "./source"

def videoList():
    url = "https://uc.tmooc.cn/video/findVideoList"

    data = {
        "courseId": "2d92cb7a25264928b1f1408dc3842663"
    }
    response = requests.post(url,data=data,headers=headers,verify=False)
    return json.loads(response.text)

def checkVideo(item):
    url = "https://uc.tmooc.cn/video/checkVideo"
    data = {
        "courseId":"2d92cb7a25264928b1f1408dc3842663",
        "stageId": item["stageId"],
        "videoId": item["id"],
        "_":str(int(time.time()*3))
    }
    response = requests.get(url,params=data,headers=headers,verify=False)
    data = json.loads(response.text)
    return data

def getvideofile(item):
    url = "https://p.bokecc.com/servlet/getvideofile"
    if item["obj"].get("") is not None:
        guid = item["obj"]["lookBackGuid"]
    else:
        guid = item["obj"]["guid"]
    params = {
        "vid":guid,
        "siteid":"0DD1F081022C163E",
        "width":"100%",
        "useragent":"other",
        "version":"20140214",
        "hlssupport":"1",
        "vc":item["name"],
        "mediatype":"undefined",
        "divid":"cc_video_"+guid+"_3692846",
        "callback":"cc_jsonp_callback_453816",
        "r":"7263265.342129019",
    }
    response = requests.get(url,params=params,headers=headers,verify=False)
    v = response.text[len("cc_jsonp_callback_453816")+1:-1]
    data = json.loads(v)
    return data

def get_key(uri):
    response = requests.get(uri,headers=headers,verify=False)
    return response.content

def get_mp4(uri):
    response = requests.get(uri,headers=headers,verify=False)
    return response.content

def merge(file,playurl,des):
    lines = file.split('\n')
    cipher = None
    des_dir = os.path.join(path,des)
    base_dir = os.path.dirname(des_dir)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    f = open(des_dir,"wb")
    for item in lines:
        if item.strip():
            if item.startswith("#EXT-X-KEY"):
                result1 = re.findall("URI=\"(.*?)\"",item)
                uri = result1[0]
                result2 = item.split("=")[-1]
                iv = binascii.unhexlify(bytes(result2[2:],encoding="utf-8"))
                key = get_key(uri)
                cipher = AES.new(key, AES.MODE_CBC, iv)
                print(cipher)
            elif item.startswith("#EXT"):
                continue
            else:
                uri = playurl.split('?')[0]
                uri = uri[:uri.rfind("/")]+'/'+item
                print(uri)
                content = get_mp4(uri)
                data = cipher.decrypt(content)
                f.write(data)
    f.close()

def get_m3u8(item,des):
    playurl = item["copies"][0]["playurl"]
    response = requests.get(playurl,headers=headers,verify=False)
    merge(response.text,playurl,des)

def downloads():
    video_list = videoList()
    target_list = []
    for index1,item in enumerate(video_list["list"]):
        cur_dir = str(index1+1) +"."+item["name"]
        for index2,content in enumerate(item["list"]):
            des = os.path.join( cur_dir,str(index2+1) +"."+content["name"]+".mp4")
            print(des)
            video = checkVideo(content)
            videofile = getvideofile(video)
            v = {"item":videofile,"des":des}
            target_list.append((None,v))
    return target_list

def run(target_list):
    print("start:")
    pool = threadpool.ThreadPool(10) 
    requests = threadpool.makeRequests(get_m3u8, target_list) 
    [pool.putRequest(req) for req in requests] 
    pool.wait() 
    print("end!")
    
items = downloads()
run(items)

运行中

在这里插入图片描述

运行结果

在这里插入图片描述

有问题+v zp953362984

°纸鸢栀年°

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
某教育网站视频抓取

快手信息抓取某教育网站视频抓取运行中运行结果某教育网站视频抓取直接上代码import requestsimport jsonimport timeimport urllib3from Crypto.Cipher import AESimport osimport reurllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)import binasciiimport threadpoolheaders =
复制链接

扫一扫