用python调用百度语音识别api批量处理本地语音文件

最新推荐文章于 2024-09-03 17:37:40 发布

Mr.Hou(2015)

最新推荐文章于 2024-09-03 17:37:40 发布

阅读量778

点赞数

分类专栏： python语言文章标签： python 语音识别

本文链接：https://blog.csdn.net/houpeng1215/article/details/109558260

版权

python语言专栏收录该内容

2 篇文章 0 订阅

订阅专栏

需要对本地的录音文件(.wav格式)进行语音识别，因阿里账号的试用版已过期，所以尝试通过百度的api进行识别，代码如下：

#导入需要用到的库
import os
import re
import requests
import time
import base64
import webbrowser
import xlrd
import pandas as pd
from copy import deepcopy

#需要安装百度api的库 pip install baidu_aip
from aip import AipSpeech

#获取文件夹下所有的录音文件
allfiles = os.listdir(r'你的录音文件存放的文件夹地址')

#将读取的录音文件路径存放在一个列表
filesList = []
for i in allfiles:
    f_adress = r'你的录音文件存放的文件夹地址\' + i
    filesList.append(f_adress)

def get audio(file):
"""用于读取文件“”“
    with open(file,'rb') as f:
        data = f.read()
    return data

def getToken(HOST):
    """获取token"""
    r = requests.get(HOST).text
    evalr = eval(r)
    accesstoken = evalr['access_token']
    return evalr_accesstoken

#定义参数
dev_pid = 1637
framerate = 16000  # 采样率
num_samples = 2000  # 采样点
channels = 1  # 声道
sampwidth = 2  # 采样宽度2bytes

base_url =  "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"

#在百度控制台上建立识别任务，可以获得下面的ID和KEY，领取免费额度（有效期6个月，只有2万次，用完之后估计就要付费了）
APP_ID = '*******'
API_KEY = '*************'
SECRET_KEY = '*********'

APIKey = API_KEY
SecretKey = SECRET_KEY
HOST = base_url % (APIKey, SecretKey)
FORMAT = 'wav'
RATE = '16000'
CHANNEL = 1
#CUID = '*******'
CUID = APP_ID   

def speech_to_text(fpath,HOST):
    speech_data = get_audio(fpath)
    speech = base64.b64encode(speech_data).decode('utf-8')
    token = getToken(HOST)
    
    data = {
        'format':FORMAT,
        'rate':RATE,
        'channel':CHANNEL,
        'cuid':CUID,
        'len':len(speech_data),
        'speech':speech,
        'token':token,
        'dev_pid':dev_pid
    }
    
    url = 'https://vop.baidu.com/server_api'
    headers = {'Content-Type':'application/json'
    r = requests.post(url,json = data, headers = headers)
    Result = r.json()
    try:
        result_text = Result['result']
        err_msg = Result['err_msg']
    except:
        result_text = ['异常']
        err_msg = "异常"
    res = {
        "result_text":result_text,
        "err_msg":err_msg
        }
    return res

#批量进行识别存储
Result_text0 = []
Result_text1 = []
err_msg = []
allfiles_da = []
testcount = 0

for fs in fileList:
    try:
        
        yuyins = speech_to_text(fs,HOST)
        Result_text1.append(yuyins["result_text"])
        try:
            Result_text0.append(yuyins["result_text"][0])
        except:
            Result_text0.append("")
        err_msg.append(yuyins["err_msg"])
        allfiles_da.append(allfiles[testcount])
    except:
        print("全部异常")
        Result_text0.append("全部异常")
        Result_text1.append("全部异常")
        err_msg.append("全部异常")
        allfiles_da.append(allfiles[testcount])
        print("全部异常")
    if testcount % 10 == 0: #没识别10个保存一次
        f1 = pd.DataFrame({'allfiles_da':allfiles_da,'Result_text1':Result_text1,'Result_text':Result_text0,'err_msg':err_msg})
        f1.to_excel(r'你需要保存的路径',index=False)
    print("正在识别第{}个".format(testcount))
    testcount += 1