Python 爬虫:逆向解析央视频

import random
import time
import asyncio
import aiohttp
import ctypes
import execjs
import binascii

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from urllib.parse import urlparse, parse_qs, urlencode
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad

javascript_file = execjs.compile('''
function createGUID() {
    var e = (new Date).getTime().toString(36)
      , t = Math.random().toString(36).replace(/^0./, "");
    return "".concat(e, "_").concat(t)
}
''')

KEY = binascii.a2b_hex("4E2918885FD98109869D14E0231A0BF4")
IV = binascii.a2b_hex("16B17E519DDD0CE5B79D7A63A4DD801C")


def aes_encrypt(data_string):
    aes = AES.new(
        key=KEY,
        mode=AES.MODE_CBC,
        iv=IV
    )
    raw = pad(data_string.encode('utf-8'), 16)
    aes_bytes = aes.encrypt(raw)
    return binascii.b2a_hex(aes_bytes).decode()


def create_qa(data_string):
    a = 0
    for i in data_string:
        _char = ord(i)
        a = (a << 5) - a + _char
        a &= a
    return ctypes.c_int(a).value


def create_ckey(vid, _rnd, app_ver, guid, platform):
    # 1、拼接字符串
    Wt = "mg3c3b04ba"
    ending = "https://w.yangshipin.cn/|mozilla/5.0 (macintosh; ||Mozilla|Netscape|MacIntel|"
    data_list = ["", vid, _rnd, Wt, app_ver, guid, platform, ending]
    data_string = "|".join(data_list)

    # 2、根据data_string生成qa
    # qa算法
    qa = create_qa(data_string)
    encrypt_string = "|{}{}".format(qa, data_string)

    # 3、AES加密
    ckey = "--01" + aes_encrypt(encrypt_string).upper()
    return ckey


async def get_vkey(session, guid, flowid, platform, app_ver, _rnd, vid, ckey):
    params = {
        "callback": "txplayerJsonpCallBack_getinfo_389028",
        "charge": "0",
        "defaultfmt": "auto",
        "otype": "json",
        "guid": guid,
        "flowid": flowid,
        "platform": platform,
        "sdtfrom": "v7007",
        "defnpayver": "0",
        "appVer": app_ver,
        "host": "m.yangshipin.cn",
        "ehost": "https://m.yangshipin.cn/video",
        "refer": "m.yangshipin.cn",
        "sphttps": "1",
        "sphls": "1",
        "_rnd": _rnd,
        "spwm": "4",
        "vid": vid,
        "defn": "auto",
        "fhdswitch": "",
        "show1080p": "false",
        "dtype": "3",
        "clip": "",
        "defnsrc": "",
        "fmt": "auto",
        "defsrc": "1",
        "encryptVer": "8.1",
        "cKey": ckey
    }

    headers = {
        "user-agent": "Mozilla/5.0 (Linux; Adroid 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
        "referer": "https://m.yangshipin.cn/"
    }
    playvinfo_url = "https://playvv.yangshipin.cn/playvinfo"
    async with session.get(url=playvinfo_url, params=params, headers=headers) as res:
        text = await res.text()
        return eval(text)


def txplayerJsonpCallBack_getinfo_389028(dict):
    fn = dict.get("vl").get("vi")[0].get("fn")
    vkey = dict.get("vl").get("vi")[0].get("fvkey")
    return fn, vkey


async def play_video(session, guid, vkey, fn, vid, pid, app_ver, platform):
    """
    https://mp4playcloud-cdn.ysp.cctv.cn/n000094fgki.Cvkc10002.mp4?sdtfrom=v7007&guid=ku5mip0w_78uqv4rv62&
    vkey=10FBF62080B04FB00F480593B9747F625D809BD0CCA740D1A415BA0133BCC9C59ECBEE179056F327D01A70B86A6E7F9D94C0B47CFCFA1F58DA09C1F1DC3FE9ED6209E88FDE63D75C08724E04974D0DB883027B65864A4E20CED67E14A429B99B5462C511EF23ECEBC19429B1A6D0E42581E0D78DA3FFB8036D421B9A964AA054&platform=2
    :return:
    """
    download_params = {
        "sdtfrom": "v7007",
        "guid": guid,
        "vkey": vkey,
        "platform": "2"
    }
    video_download_url = "https://mp4playcloud-cdn.ysp.cctv.cn/{}?{}".format(fn, urlencode(download_params))

    params = {
        "BossId": "2865",
        "Pwd": "1698957057",
        "_dc": random.random()
    }

    data = {
        "uin": "",
        "vid": vid,
        "coverid": "",
        "pid": pid,
        "guid": guid,
        "unid": "",
        "vt": "0",
        "type": "3",
        "url": "https://m.yangshipin.cn/video?type=0&vid={}".format(vid),
        "bi": "0",
        "bt": "0",
        "version": app_ver,
        "platform": platform,
        "defn": "0",
        "ctime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "ptag": "w_yangshipin_cn",
        "isvip": "-1",
        "tpid": "18",
        "pversion": "h5",
        "hc_uin": "",
        "hc_vuserid": "",
        "hc_openid": "",
        "hc_appid": "",
        "hc_pvid": "0",
        "hc_ssid": "",
        "hc_qq": "",
        "hh_ua": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML  like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
        "ua": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML  like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
        "ckey": "",
        "iformat": "0",
        "hh_ref": "https://m.yangshipin.cn/video?type=0&vid={}".format(vid),
        "vuid": "",
        "vsession": "",
        "format_ua": "other",
        "common_rcd_info": "",
        "common_ext_info": "",
        "v_idx": "0",
        "rcd_info": "",
        "extrainfo": "",
        "c_channel": "",
        "vurl": video_download_url,
        "step": "6",
        "val": "640",
        "val1": "1",
        "val2": "1",
        "idx": "0",
        "c_info": "",
        "isfocustab": "0",
        "isvisible": "0",
        "fact1": "",
        "fact2": "",
        "fact3": "",
        "fact4": "",
        "fact5": "",
        "cpay": "0",
        "tpay": "0",
        "dltype": "1",
    }

    async with session.post(url="https://btrace.yangshipin.cn/kvcollect",
                            params=params,
                            data=data,
                            headers={
                                "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
                                "referer": "https://m.yangshipin.cn/"}) as res:
        text = await res.text()
        print(text)


async def handle(url):
    async with aiohttp.ClientSession() as session:
        vid = parse_qs(urlparse(url).query)['vid'][0]
        app_ver = "1.3.5"
        platform = "4330701"
        guid = javascript_file.call('createGUID')
        pid = javascript_file.call('createGUID')  # 和guid的值不一样
        flowid = pid + "_" + platform
        _rnd = str(int(time.time()))

        # 1、获取ckey
        ckey = create_ckey(vid, _rnd, app_ver, guid, platform)

        # 2、通过ckey向playvinfo发请求获得fn和vkey
        fn, vkey = await get_vkey(session, guid, flowid, platform, app_ver, _rnd, vid, ckey)

        # 3、播放
        await play_video(session, guid, vkey, fn, vid, pid, app_ver, platform)


async def engine(url, per_request_count):
    task_list = [
        asyncio.create_task(handle(url)) for i in range(per_request_count)
    ]
    await asyncio.wait(task_list)


def task(url, per_request_count):
    asyncio.run(engine(url, per_request_count))


def run():
    url = "https://w.yangshipin.cn/video?type=0&vid=n000094fgki"
    # 播放量次数
    total_count = 100

    # 每个协程请求的个数
    per_request_count = 3

    # 线程池
    pool = ThreadPoolExecutor(5)

    loop_count, div = divmod(total_count, per_request_count)

    for i in range(loop_count):
        pool.submit(task, url, per_request_count)

    pool.submit(task, url, div)
    pool.shutdown()


if __name__ == '__main__':
    run()

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值