Wechat mini program Text to Speech feature using MS Azure text speech API

  1. Figure out audio format https://github.com/Azure-Samples/Cognitive-Speech-TTS/wiki/how-to-choose-different-audio-output-format
  2. apply Bin text to speech API https://docs.microsoft.com/en-us/azure/cognitive-services/speech/api-reference-rest/bingvoiceoutput
  3. Get speech key from Azure
  4. integrate into Wechat mini program

const speechKey = "your speech key";
const issueTokenUrl = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken"
const synthesizeUrl = "https://speech.platform.bing.com/synthesize"
const getGuid = function () {
  var s = [];
  var hexDigits = "0123456789abcdef";
  for (var i = 0; i < 36; i++) {
    s[i] = hexDigits.substr(Math.floor(Math.random() * 0x10), 1);
  }
  s[14] = "4";
  s[19] = hexDigits.substr((s[19] & 0x3) | 0x8, 1);
  s[8] = s[13] = s[18] = s[23] = "-";

  var uuid = s.join("");
  return uuid;
}

class SpeechClient {

  storageDirectory = "";
  tokenTime = null;
  token = "";

  constructor(storageDirectory) {
    this.storageDirectory = storageDirectory || "";
  }

  /**
   * get issue Token for text to speech service
   */
  getIssueTokenAsync() {
    return new Promise((resolve, reject) => {
      console.log(issueTokenUrl);
      wx.request({
        method: 'POST',
        url: issueTokenUrl,
        header: {
          'Ocp-Apim-Subscription-Key': speechKey
        },
        success(res) {
          if (res.statusCode === 200 || res.statusCode === 201) {
            return resolve(res.data);
          } else {
            return reject({
              rc: 2,
              error: 'Wrong status code returned by text to speech of MS service'
            });
          }
        },
        fail(err) {
          return reject({
            rc: 1,
            error: err
          });
        }
      });
    });
  }

  synthesizeVoiceAsync(text) {
    var nowTime = new Date().getTime();
    if (!this.token || (nowTime - this.tokenTime) > 540000) {
      this.getIssueTokenAsync().then(jwt => {
        this.token = jwt;
        this.tokenTime = new Date().getTime();

        if (this.token != "") {
          var voice = JSON.parse(wx.getStorageSync('voice'));
          var voiceName = voice.mappingName;

          var gender = "female";
          if (voice.gender == 0) {
            gender = "female";
          }

          var ssmlXML = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='" + voice.lang + "'><voice xml:lang='" + voice.lang + "' xml:gender='" + gender + "' name='Microsoft Server Speech Text to Speech Voice (" + voiceName + ")'>" + text + "</voice></speak>";

          return new Promise((resolve, reject) => {
            console.log(synthesizeUrl);

            wx.request({
              url: synthesizeUrl,
              data: ssmlXML,
              method: 'POST',
              header: {
                'Content-Type': 'application/ssml+xml',
                'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
                'Authorization': 'Bearer ' + this.token,
                'X-Search-AppId': getGuid(),
                'X-Search-ClientID': getGuid(),
              },
              responseType:'arraybuffer',
              success(res) {
                if (res.statusCode === 200 || res.statusCode === 201) {
                  // return resolve(res.data.buffer);

                  console.log(res.header);

                  var audioFilePath = wx.env.USER_DATA_PATH + '/speechAudio.mp3';
                  const fs = wx.getFileSystemManager();

                  fs.writeFileSync(audioFilePath, res.data, 'utf-8');

                  var innerAudioContext = wx.createInnerAudioContext();
                  innerAudioContext.autoplay = true
                  innerAudioContext.src = audioFilePath;
                  innerAudioContext.onPlay(() => {
                    console.log('开始播放')
                  })
                  innerAudioContext.onError((res) => {
                    console.log(res.errMsg)
                    console.log(res.errCode)
                  })
                } else {
                  return reject({
                    rc: 2,
                    error: 'Wrong status code returned by text to speech of MS service'
                  });
                }
              },
              fail(err) {
                return reject({
                  rc: 1,
                  error: err
                });
              }
            })
          })
        }
      });
    }
  }
}


module.exports = {
  SpeechClient: SpeechClient,
}

Note that

1.  MappingName is rital to implement the audio file.

2. only mp3 format is available in iOS platform and android platform.

3. reponsetype : "arraybuffer" is to facilicate audio have normal format.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值