使用Etree,re,ffmpeg
- 先根据网址获取页面数据
url_ = input('请输入网址栏的url:')
headers_ = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
"Cookie": "自己找",
"Referer": "https://www.bilibili.com/"
}
response_ = requests.get(url_, headers=headers_)
- 使用Etree转换数据类型
html_body = etree.HTML(response_.text)
- 使用Xpath获取页面数据
- 分别提取包含 视频名,视频播放地址的字符串
title_namef = html_body.xpath('//title/text()')[0]
url_str = html_body.xpath('//script[contains(text(),"window.__playinfo__")]/text()')[0]
- 使用正则提取正确的内容
#视频名称
title_name = re.findall(r'(.*?)_哔哩哔哩', title_namef)[0]
#画面数据链接
video_url = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', url_str)[0]
#音频数据链接
audio_url = re.findall(r'"audio":\[{"id":\d+,"baseUrl":"(.*?)"', url_str)[0]
- 通过链接获取数据
#更改请求头,防止被拦截
headers2_ = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
"Cookie": "自己找",
"Referer": url_
}
response_video = requests.get(video_url, headers=headers2_)
response_audio = requests.get(audio_url, headers=headers2_)
data_video = response_video.content
data_audio = response_audio.content
- 数据导入临时文件中,优化视频名称
title_name = re.sub('[\/:*?"<>|]', '', title_name).strip()
title_new = title_name + "01"
with open(f"{title_new}.mp4", 'wb+') as f:
f.write(data_video)
with open(f"{title_new}.mp3", 'wb') as f:
f.write(data_audio)
- 使用开源ffmpeg合并音视频,并删除临时文件
os.system(f'ffmpeg -i "{title_new}.mp4" -i "{title_new}.mp3" -c copy "{title_name}.mp4"')
os.remove(f"{title_new}.mp4")
os.remove(f"{title_new}.mp3")
完整代码
# -*- codeing = utf-8 -*-
# @Time : 2021/5/21 05:21
# @Author : wangzhengxiang
# @File : bilibili.py
# @Software : PyCharm
import os
import re
import requests
from lxml import etree
if __name__ == '__main__':
url_ = input('请输入网址栏的url:')
headers_ = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
"Cookie": "自己找",
"Referer": "https://www.bilibili.com/"
}
response_ = requests.get(url_, headers=headers_)
# 转换类型
html_body = etree.HTML(response_.text)
title_namef = html_body.xpath('//title/text()')[0]
title_name = re.findall(r'(.*?)_哔哩哔哩', title_namef)[0]
# 提取url
url_str = html_body.xpath('//script[contains(text(),"window.__playinfo__")]/text()')[0]
video_url = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', url_str)[0]
audio_url = re.findall(r'"audio":\[{"id":\d+,"baseUrl":"(.*?)"', url_str)[0]
headers2_ = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
"Cookie": "自己找",
"Referer": url_
}
response_video = requests.get(video_url, headers=headers2_)
response_audio = requests.get(audio_url, headers=headers2_)
data_video = response_video.content
data_audio = response_audio.content
title_name = re.sub('[\/:*?"<>|]', '', title_name).strip()
title_new = title_name + "01"
with open(f"{title_new}.mp4", 'wb+') as f:
f.write(data_video)
with open(f"{title_new}.mp3", 'wb') as f:
f.write(data_audio)
os.system(f'ffmpeg -i "{title_new}.mp4" -i "{title_new}.mp3" -c copy "{title_name}.mp4"')
os.remove(f"{title_new}.mp4")
os.remove(f"{title_new}.mp3")
FFmpeg
FFmpeg是一套可以用来记录、转换数字音频、视频,并能将其转化为流的开源计算机程序。采用LGPL或GPL许可证。它提供了录制、转换以及流化音视频的完整解决方案。它包含了非常先进的音频/视频编解码库libavcodec,为了保证高可移植性和编解码质量,libavcodec里很多code都是从头开发的。
下载和配置
下载链接
下载后解压
参考文件目录添加新的环境变量
在Path中添加%FFMPEG_HOME%bin
确定后使用Win+R,输入cmd输入ffmpeg测试
如图则可行