之前通过b站软件缓存视频到电脑后用python提取本地ts文件并合并成视频,但总觉得不方便,就想通过抓取视频播放页面的信息来直接抓到视频,经过试验是可行的,但是有几次报https访问出错,后来不知道怎么有可以了,以后再慢慢完善,反正这个是可以拉视频下来的。
#bilibili_url.py
import json
import os
import sys
import subprocess
import requests
from lxml import etree
import re
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
'Referer': 'https://www.bilibili.com/'
}
response = requests.get(url, headers=headers).text
return response
def get_playinfo(html):
pattern = r'\<script\>window\.__playinfo__=(.*?)\</script\>'
result = re.findall(pattern, html)
if len(result)>0:
return result[0]
return ""
def get_video(playinfo):
playjson=json.loads(playinfo)
video_url=playjson["data"]["dash"]["video"][0]['baseUrl']
#print(video_url)
ts_data = requests.get(url=video_url, headers=headers).content
with open(f'video.m4s', 'wb')as f:
f.write(ts_data)
return
def get_audio(playinfo):
playjson=json.loads(playinfo)
audio_url=playjson["data"]["dash"]["audio"][0]['baseUrl']
#print(audio_url)
ts_data = requests.get(url=audio_url, headers=headers).content
with open(f'audio.m4s', 'wb')as f:
f.write(ts_data)
return
def get_title(html):
tree = etree.HTML(html)
title = tree.xpath('//h1/text()')[0]
title1 = title.replace(' ', '')
return title1
if len(sys.argv)<=1:
print('请提供URL参数。')
exit()
url = sys.argv[1]
headers = {
"referer": "https://www.bilibili.com",
"origin": "https://www.bilibili.com",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36",
}
html=get_html(url)
play_json=get_playinfo(html)
title=get_title(html)
#print(play_json)
print(title)
get_video(play_json)
get_audio(play_json)
#合并视频
v_path='video.m4s'
a_path='audio.m4s'
output = title+'.mp4'
if os.path.exists(output):
os.remove(output)
ffmpeg_path = r"C:\ffmpeg\ffmpeg.exe"
cmd = ffmpeg_path + " -i "+v_path+" -i "+a_path+" -c:v copy -strict experimental " + output
print(cmd)
process=subprocess.Popen(cmd)
exit_code=process.wait()
if process.returncode == 0:
print("合并ts文件成功")
os.remove(v_path)
os.remove(a_path)
print ('删除临时文件成功')
else:
print("子进程出错,返回码:", process.returncode)
b站播放页面的url作为命令运行参数,形如
python bilibili_url.py https://www.bilibili.com/video/BV1by411H7
代码中ffmpeg_path自行设置,输出文件名为视频的标题,如果标题有非标字符可能出错,可自行改一下输出名称