基于百度api的语音识别

基于百度api的语音识别
代码如下所示:

coding=utf-8

import sys
import json
import time

IS_PY3 = sys.version_info.major == 3

if IS_PY3:
from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.parse import urlencode

timer = time.perf_counter

else:
import urllib2
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import URLError
from urllib import urlencode

if sys.platform == "win32":
    timer = time.clock
else:
    # On most other platforms the best timer is time.time()
    timer = time.time

API_KEY = ‘9cVZDkCrl0sZP3wpQlMeqZq2’
SECRET_KEY = ‘lGTYdBrcomGUAgfPCt2jrYO9Rg68IMAB’

AUDIO_FILE = ‘1.wav’ # 只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式

FORMAT = AUDIO_FILE[-3:]; # 文件后缀只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式

CUID = ‘123456PYTHON’;

RATE =16000; # 固定值

class DemoError(Exception):
pass

“”" TOKEN start “”"

TOKEN_URL = ‘http://openapi.baidu.com/oauth/2.0/token’

def fetch_token():
params = {‘grant_type’: ‘client_credentials’,
‘client_id’: API_KEY,
‘client_secret’: SECRET_KEY}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode(‘utf-8’)
req = Request(TOKEN_URL, post_data)
try:
f = urlopen(req)
result_str = f.read()
except URLError as err:
print('token http response http code : ’ + str(err.code))
result_str = err.read()
if (IS_PY3):
result_str = result_str.decode()

print(result_str)
result = json.loads(result_str)
print(result)
if ('access_token' in result.keys() and 'scope' in result.keys()):
    if SCOPE and (not SCOPE in result['scope'].split(' ')):  # SCOPE = False 忽略检查
        raise DemoError('scope is not correct')
    print('SUCCESS WITH TOKEN: %s ; EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
    return result['access_token']
else:
    raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')

“”" TOKEN end “”"

if name == ‘main’:
token = fetch_token()

"""
httpHandler = urllib2.HTTPHandler(debuglevel=1)
opener = urllib2.build_opener(httpHandler)
urllib2.install_opener(opener)
"""

speech_data = []
with open(AUDIO_FILE, 'rb') as speech_file:
    speech_data = speech_file.read()
length = len(speech_data)
if length == 0:
    raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)

params = {'cuid': CUID, 'token': token, 'dev_pid': DEV_PID}
#测试自训练平台需要打开以下信息
#params = {'cuid': CUID, 'token': token, 'dev_pid': DEV_PID, 'lm_id' : LM_ID}
params_query = urlencode(params);

headers = {
    'Content-Type': 'audio/' + FORMAT + '; rate=' + str(RATE),
    'Content-Length': length
}

url = ASR_URL + "?" + params_query
print("url is", url);
print("header is", headers)
# print post_data
req = Request(ASR_URL + "?" + params_query, speech_data, headers)
try:
    begin = timer()
    f = urlopen(req)
    result_str = f.read()
    print("Request time cost %f" % (timer() - begin))
except  URLError as err:
    print('asr http response http code : ' + str(err.code))
    result_str = err.read()

if (IS_PY3):
    result_str = str(result_str, 'utf-8')
print(result_str)
with open("result.txt", "w") as of:
    of.write(result_str)

注意:

(1)识别的语音是16K单通道语音
(2)识别时间1分钟以内
(3)可自己录制音频
MP3格式转化成WAV格式:
ffmpeg -i 01.mp3 -acodec pcm_s16le -ac 1 -ar 16000 1.wav
(4)需要自己申请(有效期1个月)
API_KEY = ‘申请api’
SECRET_KEY = ‘申请’
(5)申请网站:http://ai.baidu.com/ai-doc/SPEECH/tk4o0bm3v
(6)Python,linux操作系统

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值