工作流程
后端接收文件并调用Python
@PostMapping("/getText")
public String getText(MultipartFile file, String id){
String response;
try {
String fileName = file.getOriginalFilename();
assert fileName != null;
String suffix = fileName.substring(fileName.lastIndexOf("."));
String timeStamp = "" + System.currentTimeMillis();
String newName = id + "_" + timeStamp + suffix;
file.transferTo(new File(Tool.origin_path + newName));
Process process = Runtime.getRuntime().exec("python oral.py " +
Tool.origin_path + newName +
' ' + Tool.dst_path + id + "_" + timeStamp + ".wav");
BufferedReader in = new BufferedReader(new InputStreamReader(process.getInputStream()));
String res = "";
String line;
while( ( line = in.readLine() ) != null ) {
res += line;
}
response = res;
}catch (Exception e){
response = "error: " + e.getMessage();
}
return response;
}
Python格式转换并请求语音识别
from pydub import AudioSegment
import azure.cognitiveservices.speech as speechsdk
import sys
key = '***'
region = 'eastus'
def get_wav(origin_path, dst_path):
sound = AudioSegment.from_file(origin_path)
sound.export(dst_path, 'wav')
def from_file(path):
speech_config = speechsdk.SpeechConfig(subscription=key, region=region)
audio_config = speechsdk.AudioConfig(filename=path)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
result = speech_recognizer.recognize_once_async().get()
return result.text
if __name__=='__main__':
origin_path = sys.argv[1]
dst_path = sys.argv[2]
get_wav(origin_path, dst_path)
text = from_file(dst_path)
print(text)