比较好一点的实时语音识别程序(IAT)

这个工程调用的是讯飞的api,主要是为了能够实时把我的话转为文本,储存到电脑当中,在这个程序中会显示在终端中,程序如下,请先申请api(非商用免费)

import websocket
import hashlib
import base64
import hmac
import json
from urllib.parse import urlencode
from datetime import datetime
from time import mktime, sleep
import pyaudio
import ssl
import threading
import email.utils
from queue import Queue

STATUS_FIRST_FRAME = 0  # 第一帧的标识
STATUS_CONTINUE_FRAME = 1  # 中间帧标识
STATUS_LAST_FRAME = 2  # 最后一帧的标识

class Ws_Param(object):
    def __init__(self, APPID, APIKey, APISecret):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret

        # 公共参数(common)
        self.CommonArgs = {"app_id": self.APPID}
        # 业务参数(business),更多个性化参数可在官网查看
        self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo": 1, "vad_eos": 10000}

    # 生成url
    def create_url(self):
        url = 'wss://ws-api.xfyun.cn/v2/iat'
        # 生成RFC1123格式的时间戳
        now = datetime.now()
        date = email.utils.formatdate(mktime(now.timetuple()), usegmt=True)

        # 拼接字符串
        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
        signature_origin += "date: " + date + "\n"
        signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
        # 进行hmac-sha256进行加密
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
        # 将请求的鉴权参数组合为字典
        v = {
            "authorization": authorization,
            "date": date,
            "host": "ws-api.xfyun.cn"
        }
        # 拼接鉴权参数,生成url
        url = url + '?' + urlencode(v)
        return url

# 队列用于存储待发送的消息
message_queue = Queue()
queue_lock = threading.Lock()

def on_message(ws, message):
    try:
        # 解析 JSON 消息
        response = json.loads(message)
        code = response.get("code")
        sid = response.get("sid")

        if code != 0:
            errMsg = response.get("message", "Unknown error")
            print(f"sid:{sid} call error:{errMsg} code is:{code}")
        else:
            # 提取识别结果
            data = response.get("data", {})
            ws_data = data.get("result", {}).get("ws", [])

            result = ""
            for item in ws_data:
                for word in item.get("cw", []):
                    result += word.get("w", "")

            print(f"sid:{sid} call success! Text: {result}")

    except Exception as e:
        print("Receive msg, but parse exception:", e)

def on_error(ws, error):
    print(f"### error: {error}")

def on_close(ws, close_status_code, close_msg):
    print(f"### closed with code: {close_status_code}, message: {close_msg}")
    # 重新连接
    reconnect()

def on_open(ws):
    def run():
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
        status = STATUS_FIRST_FRAME

        try:
            while True:
                buf = stream.read(8000, exception_on_overflow=False)
                if not buf:
                    status = STATUS_LAST_FRAME

                data = {
                    "format": "audio/L16;rate=16000",
                    "audio": base64.b64encode(buf).decode('utf-8'),
                    "encoding": "raw"
                }

                msg = {"data": {**data, "status": status}}
                if status == STATUS_FIRST_FRAME:
                    msg["common"] = wsParam.CommonArgs
                    msg["business"] = wsParam.BusinessArgs
                    status = STATUS_CONTINUE_FRAME
                elif status == STATUS_LAST_FRAME:
                    sleep(1)
                    break

                # 使用队列存储待发送的消息
                with queue_lock:
                    message_queue.put(msg)

                if not ws.sock or not ws.sock.connected:
                    print("WebSocket connection is closed, exiting.")
                    break

                try:
                    ws.send(json.dumps(msg))
                except websocket.WebSocketConnectionClosedException as e:
                    print("WebSocket connection closed while sending message:", e)
                    break

                sleep(0.04)

        finally:
            stream.stop_stream()
            stream.close()
            p.terminate()
            ws.close()

    threading.Thread(target=run).start()

def reconnect():
    global wsParam
    while True:
        try:
            wsParam = Ws_Param(APPID='这里写你的appid', APISecret='同理', APIKey='同理')
            wsUrl = wsParam.create_url()
            ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
            ws.on_open = on_open

            # 使用独立线程处理消息队列
            def process_queue():
                while True:
                    with queue_lock:
                        if not message_queue.empty():
                            msg = message_queue.get()
                            try:
                                if ws.sock and ws.sock.connected:
                                    ws.send(json.dumps(msg))
                            except websocket.WebSocketConnectionClosedException as e:
                                print("WebSocket connection closed while processing queue:", e)
                                reconnect()  # 重新连接
                                break
                    sleep(0.1)

            threading.Thread(target=process_queue).start()
            ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
        except Exception as e:
            print(f"Exception occurred: {e}, retrying...")
            sleep(5)  # 等待 5 秒后重试连接

if __name__ == "__main__":
    reconnect()

网站:xfyun.cn,注意电脑要有麦克风,就没什么问题了

效果比上次好~

thanks

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
讯飞提供了一系列的语音识别API,其中包括实时语音识别。你可以使用Python来调用讯飞提供的API,实现实时语音识别的功能。下面是一个简单的示例代码: ```python import websocket import datetime import hashlib import base64 import json # 设置讯飞API的相关参数 APPID = 'your_appid' API_KEY = 'your_api_key' API_SECRET = 'your_api_secret' # 获取当前时间戳 now = datetime.datetime.now() timestamp = str(int(now.timestamp())) # 构造签名信息 signature_origin = f"{API_KEY}POSThttps://iat-api.xfyun.cn/v2/iatrealtime{timestamp}content-type:application/json" signature_sha = hashlib.sha256(signature_origin.encode('utf-8')).digest() signature = base64.b64encode(signature_sha).decode('utf-8') # 构造websocket连接地址 url = f'wss://iat-api.xfyun.cn/v2/iatrealtime?authorization={API_KEY}:{signature}&date={timestamp}&host=iat-api.xfyun.cn' # 发送websocket请求 def on_message(ws, message): result = json.loads(message) if result['code'] == 0: print(result['data']['result']) def on_error(ws, error): print(error) def on_close(ws): print("### closed ###") def on_open(ws): def run(*args): # 发送开始识别命令 data = { "common": { "app_id": APPID }, "business": { "language": "zh_cn", "domain": "iat", "accent": "mandarin", "vad_eos": 5000 } } ws.send(json.dumps(data)) thread.start_new_thread(run, ()) websocket.enableTrace(True) ws = websocket.WebSocketApp(url, on_message=on_message, on_error=on_error, on_close=on_close) ws.on_open = on_open ws.run_forever() ``` 上面的代码中,我们使用了Python中的websocket库来建立websocket连接,然后将讯飞API所需的参数进行编码,并构造出websocket连接地址。在连接成功后,我们发送开始识别命令,接着可以收到实时语音识别结果。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值