pearvideo

import re
import time

import requests


def pear_videos(url_html, id):
    # url_real = 'https://video.pearvideo.com/mp4/adshort/20220325/cont-1756320-15850227_adpkg-ad_hd.mp4'
    #
    # url_src = 'https://video.pearvideo.com/mp4/adshort/20220325/1648302899162-15850227_adpkg-ad_hd.mp4'
    # video_status = 'https://www.pearvideo.com/videoStatus.jsp?contId=1756320&mrd=0.21677247232554842'
    # url_html = 'https://www.pearvideo.com/video_1756320'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/99.0.4844.51 Safari/537.36 ',
        # 防盗链:溯源,本次请求的上一级是什么
        'Referer': f'https://www.pearvideo.com/video_{id}'
    }
    cont_id = url_html.split('_')[1]

    video_status = f'https://www.pearvideo.com/videoStatus.jsp?contId={cont_id}&mrd=0.21677247232554842'

    resp = requests.get(url=video_status, headers=headers)
    # print(resp.json())

    dic = resp.json()

    urlSrc = dic['videoInfo']['videos']['srcUrl']
    systemTime = dic['systemTime']

    url_real = urlSrc.replace(systemTime, f'cont-{cont_id}')
    # print(url_real)

    video_name = cont_id

    video_resp = requests.get(url_real)

    with open(f'F:\\pythonProject\\video\\{video_name}.mp4', mode='wb') as v:
        v.write(video_resp.content)

    print(cont_id, "ok")


if __name__ == '__main__':
    url = 'https://www.pearvideo.com/'
    headers1 = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/99.0.4844.51 Safari/537.36 ',
    }
    resp1 = requests.get(url=url, headers=headers1)

    page_content = resp1.text

    obj = re.compile(r'<div class="vervideo-bd">.*?'
                     r'<a href="video_(?P<id>.*?)" class', re.S)

    result = obj.finditer(page_content)
    c = 0
    for i in result:
        url_html = 'https://www.pearvideo.com/video_%s' % i.group('id')
        print(url_html, "url_html")
        c += 1
        pear_videos(url_html, i.group('id'))
        # 不要爬太多
        if c == 3:
            break
        time.sleep(3)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值