语音音频处理相关代码
import argparse
import functools
import os,shutil
import sys,json
import glob
import time,uuid
from flask import request, Flask, render_template
from flask_cors import CORS
from werkzeug.serving import run_simple
from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRWsAudioHandler
import asyncio
import soundfile as sf
from pydub import AudioSegment
import numpy as np
def check_wav_and_convert_to_16khz(input_wav, output_wav):
data, samplerate = sf.read(input_wav)
original_duration = len(data) / samplerate
if samplerate != 16000:
print(f"采样率 {samplerate} Hz 不是 16kHz,正在转换...")
audio = AudioSegment.from_wav(input_wav)
audio = audio.set_frame_rate(16000)
audio.export(output_wav, format="wav")
print(f"音频已保存到 {output_wav},采样率为 16kHz")
converted_duration = audio.duration_seconds
return converted_duration
else:
print(f"采样率已为 16kHz,无需转换。")
sf.write(output_wav, data, samplerate)
return original_duration
def convert_m4a_to_wav_16khz(input_file, output_file):
audio = AudioSegment.from_file(input_file, format="m4a")
audio = audio.set_frame_rate(16000)
audio.export(output_file, format="wav")
print(f"Converted {input_file} to {output_file} with 16kHz sampling rate")
duration = audio.duration_seconds
return duration
app = Flask(__name__, template_folder="templates", static_folder="static", static_url_path="/")
CORS(app)
server_ip="192.168.10.198"
server_port=8080
punc_server_ip="192.168.10.198"
punc_server_port=8087
endpoint="/paddlespeech/asr/streaming"
handler = ASRWsAudioHandler(
server_ip,
server_port,
endpoint=endpoint,
punc_server_ip=punc_server_ip,
punc_server_port=punc_server_port)
loop = asyncio.get_event_loop()
@app.route("/recognition", methods=['POST'])
def recognition():
"""
# 生成新的文件名
new_filename = str(uuid.uuid4()) + os.path.splitext(f.filename)[1] # 保留原文件扩展名
file_path = os.path.join(args.save_path, new_filename)
"""
f = request.files['audio']
if f:
audio_savedir = "./audio_savedir"
if not os.path.exists(audio_savedir):
os.makedirs(audio_savedir)
audio_format=(os.path.splitext(f.filename)[1]).lower()
new_filename = str(uuid.uuid4())
new_filename_all=new_filename+ os.path.splitext(f.filename)[1]
new_filename_all_convert="new_"+new_filename+ ".wav"
os.makedirs(os.path.join(audio_savedir, new_filename))
file_path = os.path.join(audio_savedir, new_filename,new_filename_all)
file_path_convert = os.path.join(audio_savedir, new_filename,new_filename_all_convert)
f.save(file_path)
start_time = time.time()
if audio_format==".wav":
audio_time=check_wav_and_convert_to_16khz(file_path,file_path_convert)
if audio_format==".m4a":
audio_time=convert_m4a_to_wav_16khz(file_path,file_path_convert)
try:
result = loop.run_until_complete(handler.run(file_path_convert))
end_time = time.time()
result = result["result"]
logger.info(f"\n\n检测用时{round((end_time -start_time ))}秒,asr websocket client finished : {result}")
result = str({"code": 0, "msg": "success", "result": result}).replace("'", '"')
return result
except Exception as e:
logger.error(f"asr websocket client error : {e}")
return str({"error": 1, "msg": str(e)})
finally:
if os.path.exists(os.path.join(audio_savedir, new_filename)):
pass
return str({"error": 3, "msg": "audio is None!"})
def recongition_long_audio_TOOL(file_path_convert):
result = loop.run_until_complete(handler.run(file_path_convert))
end_time = time.time()
result = result["result"]
logger.info(f"\n\n检测用时{round((end_time -0 ))}秒,asr websocket client finished : {result}")
result = str({"code": 0, "msg": "success", "result": result}).replace("'", '"')
return result
def read_wave_(wavfile_path,TIME_segmentation):
"""read the audio file from specific wavfile path
Args:
wavfile_path (str): the audio wavfile,
we assume that audio sample rate matches the model
Yields:
numpy.array: the samall package audio pcm data
"""
samples, sample_rate = sf.read(wavfile_path, dtype='int16')
x_len = len(samples)
assert sample_rate == 16000
chunk_size = int(TIME_segmentation*1000 * sample_rate / 1000)
if x_len % chunk_size != 0:
padding_len_x = chunk_size - x_len % chunk_size
else:
padding_len_x = 0
padding = np.zeros((padding_len_x), dtype=samples.dtype)
padded_x = np.concatenate([samples, padding], axis=0)
assert (x_len + padding_len_x) % chunk_size == 0
num_chunk = (x_len + padding_len_x) / chunk_size
num_chunk = int(num_chunk)
directory = os.path.dirname(wavfile_path)
file_name_tuple = os.path.basename(wavfile_path).split(".")
for i in range(0, num_chunk):
start = i * chunk_size
end = start + chunk_size
x_chunk = padded_x[start:end]
file_name=file_name_tuple[0]+"_"+str(i).zfill(6)+"."+file_name_tuple[1]
output_file = os.path.join(directory, file_name)
sf.write(output_file, x_chunk, sample_rate)
@app.route("/recognition_long_audio", methods=['POST'])
def recognition_long_audio():
f = request.files['audio']
"""
# 生成新的文件名
new_filename = str(uuid.uuid4()) + os.path.splitext(f.filename)[1] # 保留原文件扩展名
file_path = os.path.join(args.save_path, new_filename)
"""
f = request.files['audio']
if f:
audio_savedir = "./audio_savedir"
if not os.path.exists(audio_savedir):
os.makedirs(audio_savedir)
audio_format=(os.path.splitext(f.filename)[1]).lower()
new_filename = str(uuid.uuid4())
new_filename_all=new_filename+ os.path.splitext(f.filename)[1]
new_filename_all_convert="new_"+new_filename+ ".wav"
os.makedirs(os.path.join(audio_savedir, new_filename))
file_path = os.path.join(audio_savedir, new_filename,new_filename_all)
file_path_convert = os.path.join(audio_savedir, new_filename,new_filename_all_convert)
f.save(file_path)
start_time = time.time()
audio_time=0
if audio_format==".wav":
audio_time=check_wav_and_convert_to_16khz(file_path,file_path_convert)
print("-----------------------------------------",audio_time)
if audio_format==".m4a":
audio_time=convert_m4a_to_wav_16khz(file_path,file_path_convert)
print("-----------------------------------------",audio_time)
TIME_segmentation=40
if audio_time>TIME_segmentation:
read_wave_(file_path_convert,TIME_segmentation)
directory = os.path.dirname(file_path_convert)
file_name_tuple = os.path.basename(file_path_convert).split(".")
pattern = os.path.join(directory, file_name_tuple[0]+"_*.wav")
matching_files = glob.glob(pattern)
sorted_files = sorted(matching_files, key=lambda x: int(os.path.splitext(os.path.basename(x))[0].split('_')[-1]))
info_list=[]
try:
for index,file in enumerate(sorted_files):
info_temp=recongition_long_audio_TOOL(file)
if index!=(len(sorted_files)-1) and len(sorted_files)!=1:
message = json.loads(info_temp)["result"][:-1]
info_list.append(message)
else:
message = json.loads(info_temp)["result"]
info_list.append(message)
return result
except Exception as e:
logger.error(f"asr websocket client error : {e}")
return str({"error": 1, "msg": str(e)})
finally:
if os.path.exists(os.path.join(audio_savedir, new_filename)):
pass
else:
try:
result=recongition_long_audio_TOOL(file_path_convert)
return result
except Exception as e:
logger.error(f"asr websocket client error : {e}")
return str({"error": 1, "msg": str(e)})
finally:
if os.path.exists(os.path.join(audio_savedir, new_filename)):
pass
return str({"error": 3, "msg": "audio is None!"})
@app.route('/')
def home():
return render_template("index.html")
if __name__ == '__main__':
app.run(host="0.0.0.0", port=8088)