import requests
import re
import json
import io
headers={
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
"referer": "https://message.bilibili.com/",
}
def extract_content(text, symbol):
pattern = r'%s(.*?)%s' % (symbol, symbol)
result = re.findall(pattern, text)
return result
def get_url_html(url):
req=requests.get(url,headers=headers)
htmltext=req.text
get_json(htmltext)
def get_json(htmltxt):
r=re.findall(r'<script>window.__playinfo__=(.*?)</script>',htmltxt)[0]
js=json.loads(r)
audiourl=js["data"]["dash"]["audio"][0]["base_url"]
videourl=js["data"]["dash"]["video"][0]["base_url"]
download(audiourl)
def download(audiourl):
res=requests.get(url=audiourl,headers=headers)
print('爬取中,等待....')
with open(mp3,"wb") as f:
f.write(res.content)
print('爬取完毕!')
if __name__=="__main__":
"""
爬取音频,获取视频网址,下载其中音频到本地
"""
for i in range(1):
text="【"+str(i+1)+"】"+"https://www.bilibili.com/video/BV1Aq421A7Cb/?spm_id_from=333.337.search-card.all.click&vd_source=42e8b3d0f4cd4b9549473a905d860f79"
symbol = ["【","】"]
result = (extract_content(text, symbol))[0]
print('文件名为:'+result)
url_regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
urls = (re.findall(url_regex, text))[0]
print('网址:'+urls)
mp3="./output_path/"+result+'.wav'
get_url_html(urls)
from ffmpy import FFmpeg
import os
def audio_transfor(audio_path: str, output_dir: str):
ext = os.path.basename(audio_path).strip().split('.')[-1]
if ext != 'mp3':
raise Exception('format is not mp3')
result = os.path.join(output_dir, '{}.{}'.format(os.path.basename(audio_path).strip().split('.')[0], 'wav'))
filter_cmd = '-f wav -ac 1 -ar 16000'
ff = FFmpeg(
inputs={
audio_path: None}, outputs={
result: filter_cmd})
print(ff.cmd)
ff.run()
return result
def handle(mp3_audio_folder: str, mav_output_folder: str):
"""
mp3转wav
@param mp3_audio_folder MP3文件所在文件夹
@param mav_output_folder 保存文件夹路径
"""
for x in os.listdir(mp3_audio_folder):
audio_transfor(os.path.join(mp3_audio_folder, x), mav_output_folder)
os.remove(os.path.join(mp3_audio_folder, x))
import shutil
import os
def move_folder(movabs_path,rawabs_path):
"""
文件/文件夹复制
@param movabs_path 目标文件的根目录
@param rawabs_path 移动文件的根目录
"""
for item in os.listdir(movabs_path):
if item in [".vscode","Anaconda"]:
continue
s = os.path.join(movabs_path, item)
d = os.path.join(rawabs_path, item)
if os.path.isdir(s):
print(s)
shutil.copytree(s, d, True, None)
else:
print(d)
shutil.copy2(s, d)
import subprocess
import os
def cut_audio(audio_folder,output_folder):
"""
音频截取
@param pkl_folder pkl文件所在文件夹路径
@param txt_folder 要保存的txt文件的文件夹路径
"""
files = os.listdir(audio_folder)
for file in files:
audio_path = os.path.join(audio_folder,file)
output_path = os.path.join(output_folder,file)
command = ['ffmpeg', '-i', audio_path, '-ss', '27', '-acodec', 'copy', output_path]
subprocess.call(command)
import pickle
import os
def pkl2txt(pkl_folder,txt_folder):
"""
SD的pkl文件转txt
@param pkl_folder pkl文件所在文件夹路径
@param txt_folder 要保存的txt文件的文件夹路径
"""
files=os.listdir(pkl_folder)
for file in files:
file_path=os.path.join(pkl_folder,file)
with open(file_path,"rb") as f:
data=pickle.load(f)
with open(txt_folder+file.replace("pkl","txt"),'a',encoding="UTF-8") as f:
for item in data:
f.write("%s [%.3fs -> %.3fs] \n" % (item[0],item[1],item[2]))
f.close()
from moviepy.editor import *
def v2a(filder_path,audio_path):
"""
mp4转mp3或wav,视频转音频
@param filder_path 视频所在文件夹路径
@param audio_path 要保存音频的文件夹路径
"""
import os
files=os.listdir(filder_path)
for file in files:
path=os.path.join(filder_path,file)
video = VideoFileClip(path)
audio = video.audio
audio.write_audiofile(os.path.join(audio_path,file.split(".")[0]+".wav"))