百度语音识别对录音要求较高(可能是我的问题,sdk和在线api都试过了(滑稽保命)),失败后选择讯飞语音,官方提供的文档是python2版本的 ,经过修改后可在python3中运行 ,解析返回的json字符串 最后输出语音文字
一、登陆讯飞官网申请应用
二选择实时语音转写并开通服务
三、代码如下
(只是修改讯飞官网给出的demo,在python3环境下能运行。这里的音频是官网给出的音频)
#-*- encoding:utf-8 -*-
#2020/2/19 修改人员:monster water
import sys
import hashlib
from hashlib import sha1
import hmac
import base64
from socket import *
import json, time, threading
from websocket import create_connection
import websocket
from urllib.parse import quote
import logging
import importlib
importlib.reload(sys)
#sys.setdefaultencoding("utf8")
logging.basicConfig()
base_url = "ws://rtasr.xfyun.cn/v1/ws"
app_id = "5e4baasdasaabifba83" #控制台应用里得到 注意是实时语音转写
api_key = "zxcafhaiufgiabf" # 同上
file_path = "test_1.pcm" #语音文件名 修改成自己的注意路径
end_tag = "{\"end\": true}"
class Client():
def __init__(self):
# 生成鉴权参数
ts = str(int (time.time()))
tmp = app_id + ts
hl = hashlib.md5()
hl.update(tmp.encode(encoding='utf-8'))
h2=hl.hexdigest()
apikey=(bytes(api_key.encode('utf-8')))
h2=h2.encode('utf-8')
my_sign = hmac.new(apikey, h2, sha1).digest()
#signa = base64.b64encode(bytes(my_sign.encode('utf-8')))
signa = base64.b64encode(my_sign).decode('utf-8')
self.ws = create_connection(base_url + "?appid=" + app_id + "&ts=" + ts + "&signa=" + quote(signa))
self.trecv = threading.Thread(target=self.recv)
self.trecv.start()
def send(self, file_path):
file_object = open(file_path, 'rb')
try:
index = 1
while True:
chunk = file_object.read(1280)
if not chunk:
break
self.ws.send(bytes(chunk))
index += 1
time.sleep(0.04)
finally:
# print str(index) + ", read len:" + str(len(chunk)) + ", file tell:" + str(file_object.tell())
file_object.close()
self.ws.send(bytes(end_tag.encode('utf-8')))
print ("send end tag success")
def recv(self):
try:
while self.ws.connected:
result = str(self.ws.recv())
if len(result) == 0:
print ("receive result end")
break
result_dict = json.loads(result)
# 解析结果
if result_dict["action"] == "started":
print ("handshake success, result: " + result)
if result_dict["action"] == "result":
#print ("rtasr result: " + result)
#print (result_dict['data'])
result1=json.loads(result_dict['data']) #以下部分为把获得的json文件 解析
result2=result1['cn']['st']['rt'][0]['ws']
#print (result2)
len1=len(result1['cn']['st']['rt'][0]['ws'])
# result2 = json.loads(result1['cn']['st'])
# print (result2)
str1=[]
for item in range(len1):
# result3=result2[item]['cw'][0]['w']
# print (result3)
result3=result2[item]['cw'][0]['w']
str1.append(result3)
s=''.join(str1)
print(s) #输出最后得到的字符串
if result_dict["action"] == "error":
print ("rtasr error: " + result)
self.ws.close()
return
except websocket.WebSocketConnectionClosedException:
print ("receive result end")
def close(self):
self.ws.close()
print ("connection closed")
if __name__ == '__main__':
client = Client()
client.send(file_path)
四、运行结果
下面是自己测试:
成功