springboot项目集成科大讯飞文字转语音
首先到https://console.xfyun.cn/注册实名认证,得到相应的密钥
根据api文档我们了解到这个调用过程
在项目中集成
首先引入需要的依赖
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>1.5.3</version>
</dependency>
在yml文件中配置相应参数
xunfei:
hostUrl: http://tts-api.xfyun.cn/v2/tts
appid:
apisecret:
apikey:
现在写一个工具类
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import okhttp3.*;
import okio.ByteString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.*;
//静态参数注入,必须增加@Component注解
@Component
public class XunFeiUtil {
protected static final Logger log = LoggerFactory.getLogger(XunFeiUtil.class);
//讯飞四个注入参数,保存在配置文件,便于复用和避免代码上传gitee后泄漏
private static String hostUrl;
@Value("${xunfei.hostUrl}")
public void setHostUrl(String hostUrl) {
XunFeiUtil.hostUrl = hostUrl;
}
private static String appid;
@Value("${xunfei.appid}")
public void setAppid(String appid) {
XunFeiUtil.appid = appid;
}
private static String apiSecret;
@Value("${xunfei.apisecret}")
public void setApiSecret(String apiSecret) {
XunFeiUtil.apiSecret = apiSecret;
}
private static String apiKey;
@Value("${xunfei.apikey}")
public void setApiKey(String apiKey) {
XunFeiUtil.apiKey = apiKey;
}
public static final Gson json = new Gson();
private static String base64 = "";
private static volatile boolean lock = true;
/**
* 将文本转换为MP3格语音base64文件
*
* @param text 要转换的文本(如JSON串)
* @return 转换后的base64文件
*
*/
public static String convertText(String text) throws Exception {
lock = true;
base64 = "";
// 构建鉴权url
String authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
OkHttpClient client = new OkHttpClient.Builder().build();
//将url中的 schema http://和https://分别替换为ws:// 和 wss://
String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
Request request = new Request.Builder().url(url).build();
List<byte[]> list = Lists.newArrayList();
WebSocket webSocket = client.newWebSocket(request, new WebSocketListener() {
@Override
public void onOpen(WebSocket webSocket, Response response) {
super.onOpen(webSocket, response);
try {
System.out.println(response.body().string());
} catch (IOException e) {
e.printStackTrace();
}
//发送数据
JsonObject frame = new JsonObject();
JsonObject business = new JsonObject();
JsonObject common = new JsonObject();
JsonObject data = new JsonObject();
// 填充common
common.addProperty("app_id", appid);
//填充business,AUE属性lame是MP3格式,raw是PCM格式
business.addProperty("aue", "lame");
business.addProperty("sfl", 1);
business.addProperty("tte", "UTF8");//小语种必须使用UNICODE编码
business.addProperty("vcn", "xiaoyan");//到控制台-我的应用-语音合成-添加试用或购买发音人,添加后即显示该发音人参数值,若试用未添加的发音人会报错11200
business.addProperty("pitch", 50);
business.addProperty("speed", 50);
//填充data
data.addProperty("status", 2);//固定位2
try {
data.addProperty("text", Base64.getEncoder().encodeToString(text.getBytes("utf8")));
//使用小语种须使用下面的代码,此处的unicode指的是 utf16小端的编码方式,即"UTF-16LE"”
//data.addProperty("text", Base64.getEncoder().encodeToString(text.getBytes("UTF-16LE")));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
//填充frame
frame.add("common", common);
frame.add("business", business);
frame.add("data", data);
webSocket.send(frame.toString());
}
@Override
public void onMessage(WebSocket webSocket, String text) {
super.onMessage(webSocket, text);
//处理返回数据
System.out.println("receive=>");
ResponseData resp = null;
try {
resp = json.fromJson(text, ResponseData.class);
} catch (Exception e) {
e.printStackTrace();
}
if (resp != null) {
if (resp.getCode() != 0) {
System.out.println("error=>" + resp.getMessage() + " sid=" + resp.getSid());
return;
}
if (resp.getData() != null) {
String result = resp.getData().audio;
byte[] audio = Base64.getDecoder().decode(result);
list.add(audio);
// 说明数据全部返回完毕,可以关闭连接,释放资源
if (resp.getData().status == 2) {
String is = base64Concat(list);
base64 = is;
lock = false;
webSocket.close(1000, "");
}
}
}
}
@Override
public void onMessage(WebSocket webSocket, ByteString bytes) {
super.onMessage(webSocket, bytes);
}
@Override
public void onClosing(WebSocket webSocket, int code, String reason) {
super.onClosing(webSocket, code, reason);
System.out.println("socket closing");
}
@Override
public void onClosed(WebSocket webSocket, int code, String reason) {
super.onClosed(webSocket, code, reason);
System.out.println("socket closed");
}
@Override
public void onFailure(WebSocket webSocket, Throwable t, Response response) {
super.onFailure(webSocket, t, response);
System.out.println("connection failed" + response.message());
}
});
while (lock) {
}
return base64;
}
/**
* * base64拼接
*
*/
static String base64Concat(List<byte[]> list) {
int length = 0;
for (byte[] b : list) {
length += b.length;
}
int len = 0;
byte[] retByte = new byte[length];
for (byte[] b : list) {
retByte = concat(len,retByte, b);
len += b.length;
}
return cn.hutool.core.codec.Base64.encode(retByte);
}
static byte[] concat(int len,byte[] a,byte[] b){
for(int i = 0;i < b.length;i++){
a[len] = b[i];
len++;
}
return a;
}
/**
* * 获取权限地址
* *
* * @param hostUrl
* * @param apiKey
* * @param apiSecret
* * @return
*
*/
private static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
URL url = new URL(hostUrl);
SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
format.setTimeZone(TimeZone.getTimeZone("GMT"));
String date = format.format(new Date());
StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").
append("date: ").append(date).append("\n").
append("GET ").append(url.getPath()).append(" HTTP/1.1");
Charset charset = Charset.forName("UTF-8");
Mac mac = Mac.getInstance("hmacsha256");
SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
mac.init(spec);
byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
String sha = Base64.getEncoder().encodeToString(hexDigits);
String authorization = String.format("hmac username=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().
addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).
addQueryParameter("date", date).
addQueryParameter("host", url.getHost()).
build();
return httpUrl.toString();
}
public static class ResponseData {
private int code;
private String message;
private String sid;
private Data data;
public int getCode() {
return code;
}
public String getMessage() {
return this.message;
}
public String getSid() {
return sid;
}
public Data getData() {
return data;
}
}
private static class Data {
//标志音频是否返回结束 status=1,表示后续还有音频返回,status=2表示所有的音频已经返回
private int status;
//返回的音频,base64 编码
private String audio;
// 合成进度
private String ced;
}
}
编写接口
import com.back.back_system.utils.XunFeiUtil;
import io.swagger.annotations.ApiOperation;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Base64;
@RequestMapping("/xunfei")
@RestController
public class TextToAudioController {
private static final Logger log = LoggerFactory.getLogger(TextToAudioController.class);
@ApiOperation(value = "文字转语音", notes = "文字转语音")
@PostMapping(value = "textToAudio")
public void textToAudio(String text, HttpServletRequest request , HttpServletResponse response) throws IOException {
if (StringUtils.isNotBlank(text)) {
//过滤图片,h5标签
text = text.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll("<[^>]*>", "").replaceAll("[(/>)<]", "").trim();
//调用微服务接口获取音频base64
String result = "";
try {
result = XunFeiUtil.convertText(text);
} catch (Exception e) {
log.error("【文字转语音接口调用异常】", e);
}
//音频数据
byte[] audioByte = Base64.getDecoder().decode(result);
response.setContentType("application/octet-stream;charset=UTF-8");
OutputStream os = new BufferedOutputStream(response.getOutputStream());
try {
//音频流
os.write(audioByte);
} catch (IOException e) {
e.printStackTrace();
} finally {
os.flush();
os.close();
}
}
}
}
至此,后端接口编写完毕
现在我们写一个前端页面来调用该接口实现
<template>
<div class="audio">
<div>
<el-input
type="textarea"
:autosize="{minRows:3,maxRows:5}"
placeholder="请输入内容"
v-model="textArea">
</el-input>
<el-badge class="item" style="margin-right: 12px" v-loading="audioLoading">
<el-button v-if="!audioPlay" icon="el-icon-video-play" size="medium" style="margin: 10px 10px;" circle
@click="getAudio(textArea)"></el-button>
<el-button v-if="audioPlay" icon="el-icon-video-pause" size="medium" style="margin: 10px 10px;" circle
@click="audioPause"></el-button>
</el-badge>
</div>
</div>
</template>
<script>
import {textToAudio} from '../api'
export default {
name: "Home",
props: {},
components: {},
data(){
return {
text: '',
//文件组件
textArea: '',
//语音组件
audioObj: {},
//转换时loading设置
audioLoading: false,
audioPlay: false,
}
},
mounted(){
this.audioObj = new Audio();//在VUE中使用audio标签
},
methods:{
//调用后台讯飞语音转换
getAudio(text) {
if(this.text === text && this.audioObj.src){
//已有声音直接播放
this.audioObj.play()
}else {
//判断输入框内容是否改变,如果是则重新发请求
this.text = text;
if (text) {
this.audioLoading = true
let formData = new FormData()
formData.append('text', text)
textToAudio(formData).then(response => {
let url = URL.createObjectURL(response)//通过这个API让语音数据转为成一个url地址
this.audioObj.src = url//设置audio的src为上面生成的url
let playPromiser = this.audioObj.play()//进行播放
//在谷歌内核中,audio.play()会返回一个promise的值,在IE内核中就不会返回任何的值
//所以如果你要分浏览器,可以判断playPromiser的值来进行操作哦
this.audioObj.onended = () => {
}
this.audioLoading = false
}).catch(err => {
})
}
}
this.audioPlay = true
},
audioPause() {
this.audioObj.pause()
this.audioPlay = false
}
}
}
</script>
<style scoped>
.audio{
width: 90%;
position: absolute;
top: 20px;
left: 20px;
font-size: 26px;
}
}
在api.js中写文件请求:
export function textToAudio(data) {
return axios({
url: '/xunfei/textToAudio',
method: 'post',
data: data,
responseType: "blob"//后台返回的为语音的流数据
})
}
注意在请求拦截器上加上
//请求拦截器
axios.interceptors.request.use(config=>{
//语音处理
if (config.headers['content-type'] === 'application/octet-stream;charset=UTF-8') {
return res.data
}
}