某教育网站视频抓取

某教育网站视频抓取

直接上代码

import requests
import json
import time
import urllib3
from Crypto.Cipher import AES
import os
import re
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import binascii
import threadpool

headers = {
    "Cookie": "",#手动获取
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
}

path = "./source"

def videoList():
    url = "https://uc.tmooc.cn/video/findVideoList"

    data = {
        "courseId": "2d92cb7a25264928b1f1408dc3842663"
    }
    response = requests.post(url,data=data,headers=headers,verify=False)
    return json.loads(response.text)

def checkVideo(item):
    url = "https://uc.tmooc.cn/video/checkVideo"
    data = {
        "courseId":"2d92cb7a25264928b1f1408dc3842663",
        "stageId": item["stageId"],
        "videoId": item["id"],
        "_":str(int(time.time()*3))
    }
    response = requests.get(url,params=data,headers=headers,verify=False)
    data = json.loads(response.text)
    return data

def getvideofile(item):
    url = "https://p.bokecc.com/servlet/getvideofile"
    if item["obj"].get("") is not None:
        guid = item["obj"]["lookBackGuid"]
    else:
        guid = item["obj"]["guid"]
    params = {
        "vid":guid,
        "siteid":"0DD1F081022C163E",
        "width":"100%",
        "useragent":"other",
        "version":"20140214",
        "hlssupport":"1",
        "vc":item["name"],
        "mediatype":"undefined",
        "divid":"cc_video_"+guid+"_3692846",
        "callback":"cc_jsonp_callback_453816",
        "r":"7263265.342129019",
    }
    response = requests.get(url,params=params,headers=headers,verify=False)
    v = response.text[len("cc_jsonp_callback_453816")+1:-1]
    data = json.loads(v)
    return data

def get_key(uri):
    response = requests.get(uri,headers=headers,verify=False)
    return response.content

def get_mp4(uri):
    response = requests.get(uri,headers=headers,verify=False)
    return response.content

def merge(file,playurl,des):
    lines = file.split('\n')
    cipher = None
    des_dir = os.path.join(path,des)
    base_dir = os.path.dirname(des_dir)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    f = open(des_dir,"wb")
    for item in lines:
        if item.strip():
            if item.startswith("#EXT-X-KEY"):
                result1 = re.findall("URI=\"(.*?)\"",item)
                uri = result1[0]
                result2 = item.split("=")[-1]
                iv = binascii.unhexlify(bytes(result2[2:],encoding="utf-8"))
                key = get_key(uri)
                cipher = AES.new(key, AES.MODE_CBC, iv)
                print(cipher)
            elif item.startswith("#EXT"):
                continue
            else:
                uri = playurl.split('?')[0]
                uri = uri[:uri.rfind("/")]+'/'+item
                print(uri)
                content = get_mp4(uri)
                data = cipher.decrypt(content)
                f.write(data)
    f.close()

def get_m3u8(item,des):
    playurl = item["copies"][0]["playurl"]
    response = requests.get(playurl,headers=headers,verify=False)
    merge(response.text,playurl,des)

def downloads():
    video_list = videoList()
    target_list = []
    for index1,item in enumerate(video_list["list"]):
        cur_dir = str(index1+1) +"."+item["name"]
        for index2,content in enumerate(item["list"]):
            des = os.path.join( cur_dir,str(index2+1) +"."+content["name"]+".mp4")
            print(des)
            video = checkVideo(content)
            videofile = getvideofile(video)
            v = {"item":videofile,"des":des}
            target_list.append((None,v))
    return target_list

def run(target_list):
    print("start:")
    pool = threadpool.ThreadPool(10) 
    requests = threadpool.makeRequests(get_m3u8, target_list) 
    [pool.putRequest(req) for req in requests] 
    pool.wait() 
    print("end!")
    
items = downloads()
run(items)

运行中

在这里插入图片描述

运行结果

在这里插入图片描述
在这里插入图片描述
有问题+v zp953362984

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

°纸鸢栀年°

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值