目录
实现目标 :希望能够以语音的形式向提问机器人提出问题,机器人语音回答提的问题
1.准备工作
1.安装了FreeSWITCH
2.安装了(mod_cti基于FreeSWITCH)-语音识别(asr)接口
3.下载ccAdmin和sipphone(方便测试)
4.申请了免费的星火大模型套餐,获取到相关key 和相关信息,代码里面要填写的
2.java 后端接口说明
1.项目说明
这个项目是使用java 代码实现与讯飞大模型对接,实现机器人问答功能。
2.项目结构
SpringBoot Demo 项目的结构:
3.项目代码
项目代码已上传至github:GitHub - ddtxu/ddtxf
Maven 依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.18</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.67</version>
</dependency>
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>1.3.8</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.10.0</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.18</version>
<scope>compile</scope>
</dependency>
application.yml
xfxh:
# 服务引擎使用 讯飞星火认知大模型V2.0,如果使用 V1.5 需要将 hostUrl 修改为 https://spark-api.xf-yun.com/v1.1/chat
hostUrl: https://spark-api.xf-yun.com/v2.1/chat
# 发送请求时指定的访问领域,如果是 V1.5版本 设置为 general,如果是 V2版本 设置为 generalv2
domain: generalv2
# 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。取值 [0,1]
temperature: 0.5
# 模型回答的tokens的最大长度,V1.5取值为[1,4096],V2.0取值为[1,8192]。
maxTokens: 2048
# 大模型回复问题的最大响应时长,单位 s
maxResponseTime: 30
# 允许同时连接大模型的 websocket 数,如果是普通(免费)用户为 2,超过这个数连接响应会报错,具体参考官网。
QPS: 2
# 用于权限验证,从服务接口认证信息中获取
appId:
# 用于权限验证,从服务接口认证信息中获取
apiSecret:
# 用于权限验证,从服务接口认证信息中获取
apiKey:
server:
port: 8013
config 包
@Configuration
@ConfigurationProperties(prefix = "xfxh")
@Data
public class XfXhConfig {
/**
* 服务引擎使用 讯飞星火认知大模型V2.0,如果使用 V1.5 需要将 hostUrl 修改为 https://spark-api.xf-yun.com/v1.1/chat
*/
private String hostUrl;
/**
* 发送请求时指定的访问领域,如果是 V1.5版本 设置为 general,如果是 V2版本 设置为 generalv2
*/
private String domain;
/**
* 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。取值 [0,1]
*/
private Float temperature;
/**
* 模型回答的tokens的最大长度,V1.5取值为[1,4096],V2.0取值为[1,8192]。
*/
private Integer maxTokens;
/**
* 大模型回复问题的最大响应时长,单位 s
*/
private Integer maxResponseTime;
/**
* 用于权限验证,从服务接口认证信息中获取
*/
private String appId;
/**
* 用于权限验证,从服务接口认证信息中获取
*/
private String apiKey;
/**
* 用于权限验证,从服务接口认证信息中获取
*/
private String apiSecret;
}
DTO 包
MsgDTO
@Data
@AllArgsConstructor
@NoArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class MsgDTO {
/**
* 角色
*/
private String role;
/**
* 消息内容
*/
private String content;
/**
* 响应结果字段:结果序号,取值为[0,10]; 当前为保留字段,开发者可忽略
*/
private Integer index;
public static final String ROLE_USER = "user";
public static final String ROLE_ASSISTANT = "assistant";
public static MsgDTO createUserMsg(String content) {
return new MsgDTO(ROLE_USER, content, null);
}
public static MsgDTO createAssistantMsg(String content) {
return new MsgDTO(ROLE_ASSISTANT, content, null);
}
}
@NoArgsConstructor
@Data
public class ResponseDTO {
@JsonProperty("header")
private HeaderDTO header;
@JsonProperty("payload")
private PayloadDTO payload;
@NoArgsConstructor
@Data
public static class HeaderDTO {
/**
* 错误码,0表示正常,非0表示出错
*/
@JsonProperty("code")
private Integer code;
/**
* 会话是否成功的描述信息
*/
@JsonProperty("message")
private String message;
/**
* 会话的唯一id,用于讯飞技术人员查询服务端会话日志使用,出现调用错误时建议留存该字段
*/
@JsonProperty("sid")
private String sid;
/**
* 会话状态,取值为[0,1,2];0代表首次结果;1代表中间结果;2代表最后一个结果
*/
@JsonProperty("status")
private Integer status;
}
@NoArgsConstructor
@Data
public static class PayloadDTO {
@JsonProperty("choices")
private ChoicesDTO choices;
/**
* 在最后一次结果返回
*/
@JsonProperty("usage")
private UsageDTO usage;
@NoArgsConstructor
@Data
public static class ChoicesDTO {
/**
* 文本响应状态,取值为[0,1,2]; 0代表首个文本结果;1代表中间文本结果;2代表最后一个文本结果
*/
@JsonProperty("status")
private Integer status;
/**
* 返回的数据序号,取值为[0,9999999]
*/
@JsonProperty("seq")
private Integer seq;
/**
* 响应文本
*/
@JsonProperty("text")
private List<MsgDTO> text;
}
@NoArgsConstructor
@Data
public static class UsageDTO {
@JsonProperty("text")
private TextDTO text;
@NoArgsConstructor
@Data
public static class TextDTO {
/**
* 保留字段,可忽略
*/
@JsonProperty("question_tokens")
private Integer questionTokens;
/**
* 包含历史问题的总tokens大小
*/
@JsonProperty("prompt_tokens")
private Integer promptTokens;
/**
* 回答的tokens大小
*/
@JsonProperty("completion_tokens")
private Integer completionTokens;
/**
* prompt_tokens和completion_tokens的和,也是本次交互计费的tokens大小
*/
@JsonProperty("total_tokens")
private Integer totalTokens;
}
}
}
}
@NoArgsConstructor
@Data
public class RequestDTO {
@JsonProperty("header")
private HeaderDTO header;
@JsonProperty("parameter")
private ParameterDTO parameter;
@JsonProperty("payload")
private PayloadDTO payload;
@NoArgsConstructor
@Data
@AllArgsConstructor
public static class HeaderDTO {
/**
* 应用appid,从开放平台控制台创建的应用中获取
*/
@JSONField(name = "app_id")
private String appId;
/**
* 每个用户的id,用于区分不同用户,最大长度32
*/
@JSONField(name = "uid")
private String uid;
}
@NoArgsConstructor
@Data
@AllArgsConstructor
public static class ParameterDTO {
private ChatDTO chat;
@NoArgsConstructor
@Data
@AllArgsConstructor
public static class ChatDTO {
/**
* 指定访问的领域,general指向V1.5版本 generalv2指向V2版本。注意:不同的取值对应的url也不一样!
*/
@JsonProperty("domain")
private String domain;
/**
* 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高
*/
@JsonProperty("temperature")
private Float temperature;
/**
* 模型回答的tokens的最大长度
*/
@JSONField(name = "max_tokens")
private Integer maxTokens;
}
}
@NoArgsConstructor
@Data
@AllArgsConstructor
public static class PayloadDTO {
@JsonProperty("message")
private MessageDTO message;
@NoArgsConstructor
@Data
@AllArgsConstructor
public static class MessageDTO {
@JsonProperty("text")
private List<MsgDTO> text;
}
}
}
listener 包
XfXhWebSocketListener
@Slf4j
public class XfXhWebSocketListener extends WebSocketListener {
private StringBuilder answer = new StringBuilder();
private boolean wsCloseFlag = false;
public StringBuilder getAnswer() {
return answer;
}
public boolean isWsCloseFlag() {
return wsCloseFlag;
}
@Override
public void onOpen(@NotNull WebSocket webSocket, @NotNull Response response) {
super.onOpen(webSocket, response);
}
@Override
public void onMessage(@NotNull WebSocket webSocket, @NotNull String text) {
super.onMessage(webSocket, text);
// 将大模型回复的 JSON 文本转为 ResponseDTO 对象
ResponseDTO responseData = JSONObject.parseObject(text, ResponseDTO.class);
// 如果响应数据中的 header 的 code 值不为 0,则表示响应错误
if (responseData.getHeader().getCode() != 0) {
// 日志记录
log.error("发生错误,错误码为:" + responseData.getHeader().getCode() + "; " + "信息:" + responseData.getHeader().getMessage());
// 设置回答
this.answer = new StringBuilder("大模型响应错误,请稍后再试");
// 关闭连接标识
wsCloseFlag = true;
return;
}
// 将回答进行拼接
for (MsgDTO msgDTO : responseData.getPayload().getChoices().getText()) {
this.answer.append(msgDTO.getContent());
}
// 对最后一个文本结果进行处理
if (2 == responseData.getHeader().getStatus()) {
wsCloseFlag = true;
}
}
@Override
public void onFailure(@NotNull WebSocket webSocket, @NotNull Throwable t, @Nullable Response response) {
super.onFailure(webSocket, t, response);
}
@Override
public void onClosed(@NotNull WebSocket webSocket, int code, @NotNull String reason) {
super.onClosed(webSocket, code, reason);
}
}
component 包
XfXhStreamClient
@Component
@Slf4j
public class XfXhStreamClient {
@Resource
private XfXhConfig xfXhConfig;
@Value("${xfxh.QPS}")
private int connectionTokenCount;
/**
* 获取令牌
*/
public static int GET_TOKEN_STATUS = 0;
/**
* 归还令牌
*/
public static int BACK_TOKEN_STATUS = 1;
/**
* 操作令牌
*
* @param status 0-获取令牌 1-归还令牌
* @return 是否操作成功
*/
public synchronized boolean operateToken(int status) {
if (status == GET_TOKEN_STATUS) {
// 获取令牌
if (connectionTokenCount != 0) {
// 说明还有令牌,将令牌数减一
connectionTokenCount -= 1;
return true;
} else {
return false;
}
} else {
// 放回令牌
connectionTokenCount += 1;
return true;
}
}
/**
* 发送消息
*
* @param uid 每个用户的id,用于区分不同用户
* @param msgList 发送给大模型的消息,可以包含上下文内容
* @return 获取websocket连接,以便于我们在获取完整大模型回复后手动关闭连接
*/
public WebSocket sendMsg(String uid, List<MsgDTO> msgList, WebSocketListener listener) {
// 获取鉴权url
String authUrl = this.getAuthUrl();
// 鉴权方法生成失败,直接返回 null
if (authUrl == null) {
return null;
}
OkHttpClient okHttpClient = new OkHttpClient.Builder().build();
// 将 https/http 连接替换为 ws/wss 连接
String url = authUrl.replace("http://", "ws://").replace("https://", "wss://");
Request request = new Request.Builder().url(url).build();
// 建立 wss 连接
WebSocket webSocket = okHttpClient.newWebSocket(request, listener);
// 组装请求参数
RequestDTO requestDTO = getRequestParam(uid, msgList);
// 发送请求
webSocket.send(JSONObject.toJSONString(requestDTO));
return webSocket;
}
/**
* 生成鉴权方法,具体实现不用关心,这是讯飞官方定义的鉴权方式
*
* @return 鉴权访问大模型的路径
*/
public String getAuthUrl() {
try {
URL url = new URL(xfXhConfig.getHostUrl());
// 时间
SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
format.setTimeZone(TimeZone.getTimeZone("GMT"));
String date = format.format(new Date());
// 拼接
String preStr = "host: " + url.getHost() + "\n" +
"date: " + date + "\n" +
"GET " + url.getPath() + " HTTP/1.1";
// SHA256加密
Mac mac = Mac.getInstance("hmacsha256");
SecretKeySpec spec = new SecretKeySpec(xfXhConfig.getApiSecret().getBytes(StandardCharsets.UTF_8), "hmacsha256");
mac.init(spec);
byte[] hexDigits = mac.doFinal(preStr.getBytes(StandardCharsets.UTF_8));
// Base64加密
String sha = Base64.getEncoder().encodeToString(hexDigits);
// 拼接
String authorizationOrigin = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", xfXhConfig.getApiKey(), "hmac-sha256", "host date request-line", sha);
// 拼接地址
HttpUrl httpUrl = Objects.requireNonNull(HttpUrl.parse("https://" + url.getHost() + url.getPath())).newBuilder().
addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorizationOrigin.getBytes(StandardCharsets.UTF_8))).
addQueryParameter("date", date).
addQueryParameter("host", url.getHost()).
build();
return httpUrl.toString();
} catch (Exception e) {
log.error("鉴权方法中发生错误:" + e.getMessage());
return null;
}
}
/**
* 获取请求参数
*
* @param uid 每个用户的id,用于区分不同用户
* @param msgList 发送给大模型的消息,可以包含上下文内容
* @return 请求DTO,该 DTO 转 json 字符串后生成的格式参考 resources/demo-json/request.json
*/
public RequestDTO getRequestParam(String uid, List<MsgDTO> msgList) {
RequestDTO requestDTO = new RequestDTO();
requestDTO.setHeader(new RequestDTO.HeaderDTO(xfXhConfig.getAppId(), uid));
requestDTO.setParameter(new RequestDTO.ParameterDTO(new RequestDTO.ParameterDTO.ChatDTO(xfXhConfig.getDomain(), xfXhConfig.getTemperature(), xfXhConfig.getMaxTokens())));
requestDTO.setPayload(new RequestDTO.PayloadDTO(new RequestDTO.PayloadDTO.MessageDTO(msgList)));
return requestDTO;
}
}
启动类
@SpringBootApplication
public class XfXhApplication {
public static void main(String[] args) {
SpringApplication.run(XfXhApplication.class, args);
}
}
DDTestController
@RestController
@RequestMapping("/test")
@Slf4j
public class DDTestController {
@Resource
private XfXhStreamClient xfXhStreamClient;
@Resource
private XfXhConfig xfXhConfig;
@PostMapping("/flow")
public Map<String, Object> flow(@RequestBody Map<String, Object> tokenMap) throws Exception {
String asrAddr = "127.0.0.1:9988";
String ttsAddr = "";//从顶顶通官网获取相关路径
Long timestamp = (Long) tokenMap.get("timestamp");
String method = (String) tokenMap.get("method");
String callid = (String) tokenMap.get("callid");
String appid = (String) tokenMap.get("appid");
Map<String, Object> resultMap = new HashMap<>();
if ("create".equals(method)) {
String callSource = (String) tokenMap.get("call_source");
log.info("callSource ===>{}", callSource);
String sourceName = (String) tokenMap.get("source_name");
log.info("sourceName ===>{}", sourceName);
Map<String, Object> ttsMap = new HashMap<>();
resultMap.put("action", "cti_play_and_detect_speech");
String date = LocalDateTime.now().toString();
resultMap.put("argument", "'1' '64' '0' '0.8' '" + asrAddr + "' '120' '800' '5000' '20000' '' '' '" + appid + "' '1' '" + date + "' 'wav'");
ttsMap.put("ttsurl", ttsAddr);
ttsMap.put("ttsvoicename", "");
ttsMap.put("ttsconfig", "");
ttsMap.put("ttsengine", "");
ttsMap.put("ttsvolume", 0);
ttsMap.put("ttsspeechrate", 0);
ttsMap.put("ttspitchrate", 0);
resultMap.put("tts", ttsMap);
resultMap.put("privatedata", "test");
List<String> list = Arrays.asList("欢迎进入测试程序,被叫号码是", "15307306845", "请继续说话测试吧", "等待音乐.wav");
resultMap.put("playbacks", list);
resultMap.put("sound_file_dir", "/ddt/fs/sounds/cti/acd");
resultMap.put("pre_tts_text", Arrays.asList("徐先生", "2023年10月10日"));
resultMap.put("quickresponse", true);
resultMap.put("log", "create succeed");
} else if ("input".equals(method)) {
String privatedata = (String) tokenMap.get("privatedata");
log.info("privatedata ===>{}", privatedata);
String input_type = (String) tokenMap.get("input_type");
log.info("input_type ===>{}", input_type);
String input_args = (String) tokenMap.get("input_args");
log.info("input_args ===>{}", input_args);
Long input_start_time = (Long) tokenMap.get("input_start_time");
log.info("input_start_time ===>{}", input_start_time);
Integer input_duration = (Integer) tokenMap.get("input_duration");
log.info("input_duration ===>{}", input_duration);
Integer play_progress = (Integer) tokenMap.get("play_progress");
log.info("play_progress ===>{}", play_progress);
if ("complete".equals(input_type)) {
// String inputArgSub = input_args.substring(0, 6);
if (input_args.contains("hangup")) {
resultMap.put("action", "hangup");
resultMap.put("log", "挂机");
} else if (input_args.contains("record")) {
String recordfile = StrUtil.subWithLength(input_args, 7, input_args.length() - 8);
resultMap.put("action", "cti_play_and_detect_speech");
String date = LocalDateTime.now().toString();
resultMap.put("argument", "'1' '1' '0' '0.8' '" + asrAddr + "' '120' '800' '5000' '20000' '' '' " + appid + " '1' '" + date + "' 'wav'");
resultMap.put("privatedata", "test");
resultMap.put("playbacks", Arrays.asList("刚刚的录音内容是", recordfile, "请继续说话,可以说关键词,人工,转接,暂停,停止,分机来测试"));
resultMap.put("quickresponse", true);
resultMap.put("log", "播放录音");
} else {
resultMap.put("action", "cti_play_and_detect_speech");
String date = LocalDateTime.now().toString();
resultMap.put("argument", "'1' '1' '0' '0.8' '" + asrAddr + "' '120' '800' '5000' '20000' '' '' '" + appid + "' '1' '" + date + "' 'wav'");
resultMap.put("privatedata", "test");
resultMap.put("playbacks", Collections.singletonList("动作执行完成,这里必须放音,请继续说话,可以继续提问"));
resultMap.put("quickresponse", true);
resultMap.put("log", "重新开始放音");
}
} else {
String prefix = StrUtil.sub(input_args, 0, 1);
String text = StrUtil.subSuf(input_args, 1);
if ("S".equals(prefix)) {
if (!"stop".equals(privatedata)) {
if (play_progress > 0) {
resultMap.put("commands", Collections.singletonList("uuid_cti_play_and_detect_speech_break_play " + callid));
resultMap.put("privatedata", "stop");
resultMap.put("log", "停止放音,但是不停止ASR识别。模拟关键词打断");
}
}
} else if ("F".equals(prefix)) {
if (text.contains("挂断")) {
resultMap.put("action", "hangup");
resultMap.put("privatedata", "test");
resultMap.put("playbacks", Collections.singletonList("谢谢你的测试,再见"));
resultMap.put("log", "挂机");
}
else{
//根据用户语音提问讯飞大模型
String str = sendQuestion(text);
resultMap.put("action", "cti_play_and_detect_speech");
String date = LocalDateTime.now().toString();
resultMap.put("argument", "'1' '1' '0' '0.8' '" + asrAddr + "' '120' '800' '5000' '20000' '' '' '" + appid + "' '1' '" + date + "' 'wav'");
resultMap.put("privatedata", "test");
//回答的问题转语音
resultMap.put("playbacks", Collections.singletonList(str));
resultMap.put("quickresponse", true);
resultMap.put("log", "播放识别结果");
}
}
if ("D".equals(prefix)) {
resultMap.put("action", "cti_play_and_detect_speech");
String date = LocalDateTime.now().toString();
resultMap.put("argument", "'1' '1' '0' '0.8' '" + asrAddr + "' '120' '800' '10000' '20000' '' '' '" + appid + "' '1' '" + date + "' 'wav'");
resultMap.put("privatedata", "test");
resultMap.put("dtmf_terminators", "#");
resultMap.put("playbacks", Arrays.asList("刚刚的按键内容是", text, "请继续按键测试吧,并以#号键结束"));
resultMap.put("log", "按键识别结果");
} else {
resultMap.put("log", "no processing");
}
}
} else if ("destory".equals(method)) {
resultMap.put("log", "destory succeed");
}
return resultMap;
}
private StringBuilder respons(String str){
str.replaceAll("\\s+", "");
String[] parts = str.split("[,。!]");
StringBuilder sb = new StringBuilder();
for (int i = 0; i < parts.length; i++) {
sb.append("\"").append(parts[i]).append("\"");
if (i != parts.length - 1) {
sb.append(","); }
}
return sb;
}
private String sendQuestion( String question) {
// 如果是无效字符串,则不对大模型进行请求
if (StrUtil.isBlank(question)) {
return "无效问题,请重新输入";
}
// 获取连接令牌
if (!xfXhStreamClient.operateToken(XfXhStreamClient.GET_TOKEN_STATUS)) {
return "当前大模型连接数过多,请稍后再试";
}
// 创建消息对象
MsgDTO msgDTO = MsgDTO.createUserMsg(question);
// 创建监听器
XfXhWebSocketListener listener = new XfXhWebSocketListener();
// 发送问题给大模型,生成 websocket 连接
WebSocket webSocket = xfXhStreamClient.sendMsg(UUID.randomUUID().toString().substring(0, 10), Collections.singletonList(msgDTO), listener);
if (webSocket == null) {
// 归还令牌
xfXhStreamClient.operateToken(XfXhStreamClient.BACK_TOKEN_STATUS);
return "系统内部错误,请联系管理员";
}
try {
int count = 0;
// 为了避免死循环,设置循环次数来定义超时时长
int maxCount = xfXhConfig.getMaxResponseTime() * 5;
while (count <= maxCount) {
Thread.sleep(200);
if (listener.isWsCloseFlag()) {
break;
}
count++;
}
if (count > maxCount) {
return "大模型响应超时,请联系管理员";
}
// 响应大模型的答案
return listener.getAnswer().toString();
} catch (InterruptedException e) {
log.error("错误:" + e.getMessage());
return "系统内部错误,请联系管理员";
} finally {
// 关闭 websocket 连接
webSocket.close(1000, "");
// 归还令牌
xfXhStreamClient.operateToken(XfXhStreamClient.BACK_TOKEN_STATUS);
}
}
}
3 在ccadmin 里面配置
基础配置 -> 拨号方案 - >http话术 -> cti_robot 填写项目接口
4.在sipphone 配置测试
sipphone可在http://www.ddrj.com/sipphone/index.html 下载