比较好一点的实时语音识别程序（IAT）

大卫子

于 2024-08-08 08:52:01 发布

阅读量119

点赞数 3

文章标签：语音识别 xcode 人工智能

本文链接：https://blog.csdn.net/weixin_64465564/article/details/140999403

版权

这个工程调用的是讯飞的api，主要是为了能够实时把我的话转为文本，储存到电脑当中，在这个程序中会显示在终端中，程序如下，请先申请api（非商用免费）

import websocket
import hashlib
import base64
import hmac
import json
from urllib.parse import urlencode
from datetime import datetime
from time import mktime, sleep
import pyaudio
import ssl
import threading
import email.utils
from queue import Queue

STATUS_FIRST_FRAME = 0  # 第一帧的标识
STATUS_CONTINUE_FRAME = 1  # 中间帧标识
STATUS_LAST_FRAME = 2  # 最后一帧的标识

class Ws_Param(object):
    def __init__(self, APPID, APIKey, APISecret):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret

        # 公共参数(common)
        self.CommonArgs = {"app_id": self.APPID}
        # 业务参数(business)，更多个性化参数可在官网查看
        self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo": 1, "vad_eos": 10000}

    # 生成url
    def create_url(self):
        url = 'wss://ws-api.xfyun.cn/v2/iat'
        # 生成RFC1123格式的时间戳
        now = datetime.now()
        date = email.utils.formatdate(mktime(now.timetuple()), usegmt=True)

        # 拼接字符串
        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
        signature_origin += "date: " + date + "\n"
        signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
        # 进行hmac-sha256进行加密
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
        # 将请求的鉴权参数组合为字典
        v = {
            "authorization": authorization,
            "date": date,
            "host": "ws-api.xfyun.cn"
        }
        # 拼接鉴权参数，生成url
        url = url + '?' + urlencode(v)
        return url

# 队列用于存储待发送的消息
message_queue = Queue()
queue_lock = threading.Lock()

def on_message(ws, message):
    try:
        # 解析 JSON 消息
        response = json.loads(message)
        code = response.get("code")
        sid = response.get("sid")

        if code != 0:
            errMsg = response.get("message", "Unknown error")
            print(f"sid:{sid} call error:{errMsg} code is:{code}")
        else:
            # 提取识别结果
            data = response.get("data", {})
            ws_data = data.get("result", {}).get("ws", [])

            result = ""
            for item in ws_data:
                for word in item.get("cw", []):
                    result += word.get("w", "")

            print(f"sid:{sid} call success! Text: {result}")

    except Exception as e:
        print("Receive msg, but parse exception:", e)

def on_error(ws, error):
    print(f"### error: {error}")

def on_close(ws, close_status_code, close_msg):
    print(f"### closed with code: {close_status_code}, message: {close_msg}")
    # 重新连接
    reconnect()

def on_open(ws):
    def run():
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
        status = STATUS_FIRST_FRAME

        try:
            while True:
                buf = stream.read(8000, exception_on_overflow=False)
                if not buf:
                    status = STATUS_LAST_FRAME

                data = {
                    "format": "audio/L16;rate=16000",
                    "audio": base64.b64encode(buf).decode('utf-8'),
                    "encoding": "raw"
                }

                msg = {"data": {**data, "status": status}}
                if status == STATUS_FIRST_FRAME:
                    msg["common"] = wsParam.CommonArgs
                    msg["business"] = wsParam.BusinessArgs
                    status = STATUS_CONTINUE_FRAME
                elif status == STATUS_LAST_FRAME:
                    sleep(1)
                    break

                # 使用队列存储待发送的消息
                with queue_lock:
                    message_queue.put(msg)

                if not ws.sock or not ws.sock.connected:
                    print("WebSocket connection is closed, exiting.")
                    break

                try:
                    ws.send(json.dumps(msg))
                except websocket.WebSocketConnectionClosedException as e:
                    print("WebSocket connection closed while sending message:", e)
                    break

                sleep(0.04)

        finally:
            stream.stop_stream()
            stream.close()
            p.terminate()
            ws.close()

    threading.Thread(target=run).start()

def reconnect():
    global wsParam
    while True:
        try:
            wsParam = Ws_Param(APPID='这里写你的appid', APISecret='同理', APIKey='同理')
            wsUrl = wsParam.create_url()
            ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
            ws.on_open = on_open

            # 使用独立线程处理消息队列
            def process_queue():
                while True:
                    with queue_lock:
                        if not message_queue.empty():
                            msg = message_queue.get()
                            try:
                                if ws.sock and ws.sock.connected:
                                    ws.send(json.dumps(msg))
                            except websocket.WebSocketConnectionClosedException as e:
                                print("WebSocket connection closed while processing queue:", e)
                                reconnect()  # 重新连接
                                break
                    sleep(0.1)

            threading.Thread(target=process_queue).start()
            ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
        except Exception as e:
            print(f"Exception occurred: {e}, retrying...")
            sleep(5)  # 等待 5 秒后重试连接

if __name__ == "__main__":
    reconnect()

网站：xfyun.cn，注意电脑要有麦克风，就没什么问题了

效果比上次好~

thanks

大卫子

关注

3
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
比较好一点的实时语音识别程序（IAT）

这个工程调用的是讯飞的api，主要是为了能够实时把我的话转为文本，储存到电脑当中，在这个程序中会显示在终端中，程序如下，请先申请api（非商用免费）网站：xfyun.cn，注意电脑要有麦克风，就没什么问题了。
复制链接

扫一扫