免责声明
文章所涉及内容,仅供安全研究与教学之用,由于传播、利用本文所提供的信息而造成的任何直接或者间接的后果及损失,均由使用者本人负责,作者不为此承担任何法律及连带责任。
爬取单个视频
需要视频的bvid号
最终代码
需要更换为自己的cookie和header
import time
from tqdm import tqdm
import requests
import json
import re,os
from moviepy.editor import AudioFileClip, VideoFileClip, CompositeVideoClip
cookies = {
}
headers = {
}
params = {
'spm_id_from': '333.1007.tianma.1-2-2.click',
'vd_source': '367145f6e11228ae3f9f416af6f5cd2e',
}
#下载视频和音频url, headers, output_path, title
def download_vedio_radio(url,output_path,title):
if not os.path.exists("result"):
os.makedirs("result")
# 定义存放位置
save_dir = os.getcwd()+output_path#文件夹绝对路径
# 下载视频
response = requests.get(url, headers=headers, stream=True)
total_size = int(response.headers.get('content-length', 0))
block_size = 1024 # 每次读取的字节数
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=f"正在下载{title}")
with open(save_dir, 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
return save_dir
#获取视频音频地址
def get_video_url(vb):
response = requests.get(url="https://www.bilibili.com/video/"+vb, params=params, cookies=cookies,
headers=headers)
text_data = response.text
# print(text_data)
# urls=json.loads(json_data)
title = re.findall('<title data-vue-meta="true">(.*)</title> ', text_data)[0]
title = title.replace('_哔哩哔哩_bilibili', '')
json_video_info = re.findall('window.__playinfo__=(.*?)</script>', text_data)[0]
dict_data = json.loads(json_video_info)
video_url = dict_data['data']['dash']['video'][0]['base_url']
audio_url = dict_data['data']['dash']['audio'][0]['baseUrl']
# 下载视频和音频
return video_url, audio_url, title
import subprocess
#合并视频音频
def merge_audio_video(video_file, audio_file):
start_time = time.time()
outfile_name = video_file.split('.')[0] + '-.mp4'
cmd = f'ffmpeg -i {video_file} -i {audio_file} -acodec copy -vcodec copy {outfile_name} -y'
subprocess.call(cmd,shell=True)
end_time = time.time()
print(f'{outfile_name} 合并成功,耗时{round(end_time - start_time, 1)}秒')
#删已经完成合并的音频和视频
def delete_file(directory, file_name):
file_path = os.path.join(directory, file_name)
file_paths = [file_path + ext for ext in (".mp4", ".mp3")]
for path in file_paths:
if os.path.exists(path):
os.remove(path)
print(f"{file_name}已成功删除。")
else:
print(f"{file_name}不存在。")
#获取一个视频
def get_one_video_url(vb):
try:
video_info = get_video_url(vb)
video_url = video_info[0]
audio_url = video_info[1]
title = video_info[2]
#下载视频和音频
download_vedio_radio(video_url, '/result/' +title+ '.mp4', title)
download_vedio_radio(audio_url, '/result/' +title+ '.mp3', title)
# 合并视频音频
result_file_dir = os.getcwd() + "/result/" + title
merge_audio_video(result_file_dir+'.mp4',result_file_dir+'.mp3')
# 删除已经完成合并的音频和视频
delete_file(os.getcwd()+"//result", title)
except Exception as e:
print(e)
if __name__ == '__main__':
start_time=time.time()
vb="BV1VG411k77r"
get_one_video_url(vb)
end_time=time.time()
print("耗时:",round(end_time-start_time, 1),"秒")
# download_vedio_radio()
步骤讲解
获取视频和音频url链接
需要自定义headers,params和cookies,如果觉得麻烦,有快捷方式
快捷生成请求的网站Convert curl commands to code,找到 发送xhr数据包的请求,右击保存为curl
自行更改
def get_video_url(vb):
response = requests.get(url="https://www.bilibili.com/video/"+vb, params=params, cookies=cookies,
headers=headers)
text_data = response.text
# print(text_data)
# urls=json.loads(json_data)
title = re.findall('<title data-vue-meta="true">(.*)</title> ', text_data)[0]
title = title.replace('_哔哩哔哩_bilibili', '')
json_video_info = re.findall('window.__playinfo__=(.*?)</script>', text_data)[0]
dict_data = json.loads(json_video_info)
video_url = dict_data['data']['dash']['video'][0]['base_url']
audio_url = dict_data['data']['dash']['audio'][0]['baseUrl']
return video_url, audio_url, title
下载视频和音频
def download_vedio_radio(url,output_path,title):
if not os.path.exists("result"):
os.makedirs("result")
# 定义存放位置
save_dir = os.getcwd()+output_path#文件夹绝对路径
# 下载视频
response = requests.get(url, headers=headers, stream=True)
total_size = int(response.headers.get('content-length', 0))
block_size = 1024 # 每次读取的字节数
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=f"正在下载{title}")
with open(save_dir, 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
return save_dir
合并视频和音频
这里使用了ffmpeg合并视频和音频,速度非常快,去官网下载,将/bin目录添加到环境变量
def merge_audio_video(video_file, audio_file):
start_time = time.time()
outfile_name = video_file.split('.')[0] + '-.mp4'
cmd = f'ffmpeg -i {video_file} -i {audio_file} -acodec copy -vcodec copy {outfile_name} -y'
subprocess.call(cmd,shell=True)
end_time = time.time()
print(f'{outfile_name} 合并成功,耗时{round(end_time - start_time, 1)}秒')
删除已完成合并的音频和视频
def delete_file(directory, file_name):
file_path = os.path.join(directory, file_name)
file_paths = [file_path + ext for ext in (".mp4", ".mp3")]
for path in file_paths:
if os.path.exists(path):
os.remove(path)
print(f"{file_name}已成功删除。")
else:
print(f"{file_name}不存在。")
本人新学 的poython,希望各位佬多多指教,不惜勿喷