- Figure out audio format https://github.com/Azure-Samples/Cognitive-Speech-TTS/wiki/how-to-choose-different-audio-output-format
- apply Bin text to speech API https://docs.microsoft.com/en-us/azure/cognitive-services/speech/api-reference-rest/bingvoiceoutput
- Get speech key from Azure
- integrate into Wechat mini program
const speechKey = "your speech key";
const issueTokenUrl = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken"
const synthesizeUrl = "https://speech.platform.bing.com/synthesize"
const getGuid = function () {
var s = [];
var hexDigits = "0123456789abcdef";
for (var i = 0; i < 36; i++) {
s[i] = hexDigits.substr(Math.floor(Math.random() * 0x10), 1);
}
s[14] = "4";
s[19] = hexDigits.substr((s[19] & 0x3) | 0x8, 1);
s[8] = s[13] = s[18] = s[23] = "-";
var uuid = s.join("");
return uuid;
}
class SpeechClient {
storageDirectory = "";
tokenTime = null;
token = "";
constructor(storageDirectory) {
this.storageDirectory = storageDirectory || "";
}
/**
* get issue Token for text to speech service
*/
getIssueTokenAsync() {
return new Promise((resolve, reject) => {
console.log(issueTokenUrl);
wx.request({
method: 'POST',
url: issueTokenUrl,
header: {
'Ocp-Apim-Subscription-Key': speechKey
},
success(res) {
if (res.statusCode === 200 || res.statusCode === 201) {
return resolve(res.data);
} else {
return reject({
rc: 2,
error: 'Wrong status code returned by text to speech of MS service'
});
}
},
fail(err) {
return reject({
rc: 1,
error: err
});
}
});
});
}
synthesizeVoiceAsync(text) {
var nowTime = new Date().getTime();
if (!this.token || (nowTime - this.tokenTime) > 540000) {
this.getIssueTokenAsync().then(jwt => {
this.token = jwt;
this.tokenTime = new Date().getTime();
if (this.token != "") {
var voice = JSON.parse(wx.getStorageSync('voice'));
var voiceName = voice.mappingName;
var gender = "female";
if (voice.gender == 0) {
gender = "female";
}
var ssmlXML = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='" + voice.lang + "'><voice xml:lang='" + voice.lang + "' xml:gender='" + gender + "' name='Microsoft Server Speech Text to Speech Voice (" + voiceName + ")'>" + text + "</voice></speak>";
return new Promise((resolve, reject) => {
console.log(synthesizeUrl);
wx.request({
url: synthesizeUrl,
data: ssmlXML,
method: 'POST',
header: {
'Content-Type': 'application/ssml+xml',
'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
'Authorization': 'Bearer ' + this.token,
'X-Search-AppId': getGuid(),
'X-Search-ClientID': getGuid(),
},
responseType:'arraybuffer',
success(res) {
if (res.statusCode === 200 || res.statusCode === 201) {
// return resolve(res.data.buffer);
console.log(res.header);
var audioFilePath = wx.env.USER_DATA_PATH + '/speechAudio.mp3';
const fs = wx.getFileSystemManager();
fs.writeFileSync(audioFilePath, res.data, 'utf-8');
var innerAudioContext = wx.createInnerAudioContext();
innerAudioContext.autoplay = true
innerAudioContext.src = audioFilePath;
innerAudioContext.onPlay(() => {
console.log('开始播放')
})
innerAudioContext.onError((res) => {
console.log(res.errMsg)
console.log(res.errCode)
})
} else {
return reject({
rc: 2,
error: 'Wrong status code returned by text to speech of MS service'
});
}
},
fail(err) {
return reject({
rc: 1,
error: err
});
}
})
})
}
});
}
}
}
module.exports = {
SpeechClient: SpeechClient,
}
Note that
1. MappingName is rital to implement the audio file.
2. only mp3 format is available in iOS platform and android platform.
3. reponsetype : "arraybuffer" is to facilicate audio have normal format.