Python2.7对接科大讯飞的语音合成和语音识别接口

最新推荐文章于 2023-10-27 09:07:56 发布

代码小猫熊

最新推荐文章于 2023-10-27 09:07:56 发布

阅读量1.4w

点赞数 1

文章标签：科大讯飞接口语音识别

本文链接：https://blog.csdn.net/kyowill1988/article/details/79765248

版权

小Z正在尝试科大讯飞的语音合成和语音识别的接口调用。

需要注意的是：

1.白名单要在我的应用中配置，生效有5到10分钟的延迟，配置不正确会报错：

{"code":"10105","data":"","desc":"illegal access|illegal client_ip: XXXXXX","sid":"XXXX"}

2.语音识别和语音合成使用不同的APPKEY，配置的时候要检查清楚，否则会报错：

{"code":"10105","data":"","desc":"illegal access|no appid info","sid":"XXXX"}

语音识别

# -*- coding: UTF-8 -*-
import requests
import time
import urllib
import json
import hashlib
import base64

URL = "http://api.xfyun.cn/v1/service/v1/iat"
APPID = ""
API_KEY = ""

def getHeader():
    curTime = str(int(time.time()))
    param = "{\"engine_type\": \"sms16k\", \"aue\": \"raw\"}"
    paramBase64 = base64.b64encode(param)

    m2 = hashlib.md5()
    m2.update(API_KEY + curTime + paramBase64)
    checkSum = m2.hexdigest()
    header ={
		'X-CurTime':curTime,
		'X-Param':paramBase64,
		'X-Appid':APPID,
		'X-CheckSum':checkSum,
		'Content-Type':'application/x-www-form-urlencoded; charset=utf-8',
	}
    return header

def main():
    f = open("", 'rb')
    file_content = f.read()
    base64_audio = base64.b64encode(file_content)
    body = urllib.urlencode({'audio': base64_audio})

    r = requests.post(URL,headers=getHeader(),data=body)
    result = json.loads(r.content)

    if result["code"] == "0":
        print "success, data = " + result["data"]
    else:
        print r.text

    return

if __name__ == '__main__':
    main()

语音合成

#-*- coding: utf-8 -*-
import requests
import time
import hashlib
import base64

URL = "http://api.xfyun.cn/v1/service/v1/tts"
APPID = ""
API_KEY = ""
def getHeader(auf, aue, voiceName, speed, volume, pitch, engineType, textType):
	curTime = str(int(time.time()))
	param = "{\"auf\":\""+auf+"\""
	if aue != "":
		param +=",\"aue\":\"" + aue + "\""

	if voiceName != "":
		param +=",\"voice_name\":\"" + voiceName + "\""

	if speed != "":
		param +=",\"speed\":\"" + speed + "\""

	if volume != "":
		param +=",\"volume\":\"" + volume + "\""

	if pitch != "":
		param +=",\"pitch\":\"" + pitch + "\""

	if engineType != "":
		param +=",\"engine_type\":\"" + engineType + "\""

	if textType != "":
		param +=",\"text_type\":\"" + textType + "\""

	param +="}"

	paramBase64 = base64.b64encode(param)

	m2 = hashlib.md5()
	m2.update(API_KEY + curTime + paramBase64)
	checkSum = m2.hexdigest()
	header ={
		'X-CurTime':curTime,
		'X-Param':paramBase64,
		'X-Appid':APPID,
		'X-CheckSum':checkSum,
		'Content-Type':'application/x-www-form-urlencoded; charset=utf-8',
	}
	return header

def getBody(text):
	data = {'text':text}
	return data

def writeFile(file, content):
    with open(file, 'wb') as f:
    	f.write(content)
    f.close()

r = requests.post(URL,headers=getHeader("audio/L16;rate=16000", "raw", "xiaoyan", "50", "50", "50", "aisound", "text"),data=getBody("请输入你的身高和体重"))
contentType = r.headers['Content-Type']
if contentType == "audio/mpeg":
    sid = r.headers['sid']
    writeFile("audio/"+sid+".wav", r.content)
    print "success, sid = " + sid
else :
    print r.text