借助百度API,通过python实现音频识别,注意,百度仅提供低于60s的python识别接口,且对输入音频格式有一定要求:录音参数必须符合 8k/16k 采样率、16bit 位深、单声道,支持的格式有:pcm(不压缩)、wav(不压缩,pcm编码)、amr(压缩格式)
代码如下:
# coding: utf-8
import urllib.request
import json
import base64
import os
#设置应用信息
baidu_server = "https://openapi.baidu.com/oauth/2.0/token?"
grant_type = "client_credentials"
client_id = "XBGZGcu00cscPWt7AYguhjf7" #填写API Key
client_secret = "jGuw7Q9YTGKFKgvK7LPDEbKYHCyyGRYn" #填写Secret Key
#合成请求token的URL
url = baidu_server+"grant_type="+grant_type+"&client_id="+client_id+"&client_secret="+client_secret
# print("---"*30)
#获取token
res = urllib.request.urlopen(url).read()
data = json.loads(res.decode('utf-8'))
token = data["access_token"]
print (token)
#设置音频属性,根据百度的要求,采样率必须为8000,压缩格式支持pcm(不压缩)、wav、opus、speex、amr
VOICE_RATE = 16000
WAVE_FILE = "E:/1.wav" #音频文件的路径
USER_ID = "hail_hydra" #用于标识的ID,可以随意设置
WAVE_TYPE = "wav"
data = {}
# 语音的一些参数
data['format'] = 'wav'
data['rate'] = VOICE_RATE
data['channel'] = 1
data['cuid'] = USER_ID
data['token'] = token
wav_fp = open(WAVE_FILE,'rb')
voice_data = wav_fp.read()
data['len'] = len(voice_data)
data['speech'] = base64.b64encode(voice_data).decode('utf-8')
post_data = json.dumps(data)
print("---"*30)
headers = { 'Content-Type' : 'application/json' }
url = "http://vop.baidu.com/server_api"
req = urllib.request.Request(url, bytes(post_data, encoding="utf-8"), headers)
r = urllib.request.urlopen(req)
t = r.read().decode("utf-8")
result = json.loads(t)
# print (result)
print(result['result'][0])