python爬取视频‍♀️‍♀️‍♀️

 

#  -*- coding = utf-8 -*-
import json
import pprint
import random
import string
from urllib.parse import quote
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from io import BytesIO
import gzip
import os
import re
import json
import requests

my_headers = [
    {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'},
    {
        'user-agent': "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"},
    {
        'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36"},
    {'user-agent': "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0"},
    {
        'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14"},
    {'user-agent': "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)"},
    {'user-agent': "Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11"},
    {'user-agent': 'Opera/9.25 (Windows NT 5.1; U; en)'},
    {'user-agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'},
    {'user-agent': 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)'},
    {
        'user-agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12'},
    {'user-agent': 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'},
    {
        'user-agent': "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7"},
    {'user-agent': "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0"}
]


def get_video(title, arcurl):
    # url = "https://www.bilibili.com/video/av672827214"
    url = arcurl
    print(url)
    headers = {
        'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1',
        'cookie': "PVID=1; _uuid=FA5AFE1E-3E22-ADFE-9226-095428A97F3345229infoc; buvid3=398554D6-6C1F-43DB-8895-C824AA004AF1148822infoc; LIVE_BUVID=AUTO7716288257439557; blackside_state=1; rpdid=|(k|k)uYu|lY0J'uYk))kl~~R; CURRENT_QUALITY=80; fingerprint=1c362c351c6ff64e9b65b1205605e8c7; buvid_fp=398554D6-6C1F-43DB-8895-C824AA004AF1148822infoc; buvid_fp_plain=398554D6-6C1F-43DB-8895-C824AA004AF1148822infoc; SESSDATA=a9f32a53%2C1646718341%2Cf9fe6%2A91; bili_jct=71f1aded9db7273a9f5c5e758de2dc63; DedeUserID=593705704; DedeUserID__ckMd5=0c57b9d95a2d6cde; sid=5ammui66; CURRENT_FNVAL=976; bsource=search_baidu; innersign=1",
        'referer': 'https://www.bilibili.com/video/av672827214'
    }
    response = requests.get(url, headers=headers)
    data = response.text

    # 获取json数据
    data_json = re.findall('<script>window\.__playinfo__=(.*?)</script>', data)[0]
    data_json = json.loads(data_json)
    # pprint.pprint(data_json)
    # 音频
    audio = data_json['data']['dash']['audio'][0]['baseUrl']
    # 视频
    video = data_json['data']['dash']['video'][0]['baseUrl']
    urlFrames = audio
    urlVoice = video

    # urlFrames = "https://cn-jxnc-cmcc-bcache-05.bilivideo.com/upgcxcode/26/02/329930226/329930226-1-30077.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1634961210&gen=playurlv2&os=bcache&oi=1862809570&trid=00006f7a6feb43274a8ab2d51253f1733491u&platform=pc&upsig=61c9525e98870b8ce26c8116e7b3e1e5&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&cdnid=4204&mid=593705704&bvc=vod&nettype=0&orderid=0,3&agrr=0&logo=80000000"
    # urlVoice = "https://xy117x141x226x28xy.mcdn.bilivideo.cn:4483/upgcxcode/26/02/329930226/329930226_nb2-1-30280.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1634961210&gen=playurlv2&os=mcdn&oi=1862809570&trid=00016f7a6feb43274a8ab2d51253f1733491u&platform=pc&upsig=b9cd2d4c8eb3d8955cd94b7408cbc035&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mcdnid=2001012&mid=593705704&bvc=vod&nettype=0&orderid=0,3&agrr=0&logo=A0000002"
    # # 手动构造请求头参数
    # headers = {
    #     'User-Agent':'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1',
    #     'referer':'https://www.bilibili.com/video/av672827214'
    # }
    # 发送请求,获取响应对象
    FramesResponse = requests.get(urlFrames, headers=headers)
    VoiceResponse = requests.get(urlVoice, headers=headers)
    # 字节类型的数据提取
    FramesData = FramesResponse.content
    VoiceData = VoiceResponse.content
    # 保存在本地
    with open("FramesResponse.mp4", 'wb') as f:
        f.write(FramesData)
    with open("VoiceResponse.mp3", 'wb') as f:
        f.write(VoiceData)

    # 视频合成 : 第三方工具 ffmpeg
    os.system('ffmpeg -i "FramesResponse.mp4" -i "VoiceResponse.mp3" -c copy "' + title + '.mp4"')

if __name__ == '__main__':
    # header = random.choice(my_headers)
    # header['referer'] = "https://search.bilibili.com/all?keyword=%E9%99%88%E6%98%9F%E6%97%AD"
    # url = "https://search.bilibili.com/all?keyword=%E9%99%88%E6%98%9F%E6%97%AD"
    # url = quote(url, safe=string.printable)
    # request = Request(url, headers=header)
    # res = urlopen(request)
    # with open("./loadHTML/temp.html", "wb") as f:
    #     f.write(res.read())
    with open("./loadHTML/temp.html", "r", encoding='utf-8') as f:
        data = f.read()
    li = data.split('''window.__INITIAL_STATE__=''')
    li = li[1:]
    list = li[0].split(''';(function(){var s;(s=document.currentScript''')
    list = list[:-1]
    jsonList = json.loads(list[0])
    field = jsonList['flow']['fields'][0]
    listO = jsonList['flow'][field]['result'][-1:][0]
    lists = listO['data']
    for li in lists:
        title = str(li['title'])
        title = title.replace('''<em class="keyword">陈星旭</em>''', '').replace('【', '').replace('】', '')
        arcurl = li['arcurl']
        get_video(title, arcurl)
        break;

感谢 wangaolong0427的博客_会百度的皮卡丘_CSDN博客 的鼎力相助!!!

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值