一、前言
经过前期一系列的成本和技术评估,以及各种第三方平台大模型API的实际样本测试效果,最终我们选择了和预期效果最符合的腾讯云hunyuan-turbos-latest模型作为UGC内容安全审核的接入方案。通过接入混元模型可以有效提升UGC内容审核的准确度(87.5%以上),且价格也比较便宜(百万Token输入是0.8元,输出是2元),对于企业来说是一个很好的接入方案(相比较自己部署一套小模型如Qwen7B来说,不管是效果还是成本都好得多)。
本篇要解决的业务问题:已知腾讯云混元模型的并发数是20个(目前官网默认单账号并发5个,每提高1个并发需要800元/月,可以找商务协商免费调大),且API平均1条要处理5秒(非流式响应),我们每天产生的UGC数据超过20w条,如何高性能地接入腾讯云混元模型呢?
业务整体架构如下:
- 成本和技术评估可参考:内容安全审核接入大语言模型的成本和技术评估-CSDN博客
- 其他模型API接入和样本测试效果可参考:通过大语言模型提高内容安全审核准确性_验证文本大模型输出结果的安全性是否正确-CSDN博客
二、术语
- UGC内容:用户发布的内容,如练笔、评论、个性签名、昵称等。
- Hystrix: 通过分配独立线程池,限制服务调用的并发数(有效解决突发流量);通过熔断器和服务降级可以有效解决服务间调用的故障和资源隔离问题,防止级联故障导致系统崩溃。
三、技术实现
3.1 Hystrix的配置
因为请求优先选择Hystrix的核心线程,超过核心线程则进入等待队列,超过等待队列则进入动态扩展线程,这里配置核心线程数CoreSize即为腾讯云允许的并发数(这里是20),配置队列容量QueueSize为100允许上游流量较大时适当进行排队等待,不设置动态扩展线程数避免超过腾讯云允许的并发数
/**
* 超时时间15秒(包括排队时间)
*/
private static final int HYSTRIX_TIMEOUT = 15000;
/**
* hystrix配置
*/
private HystrixCommand.Setter setter;
// 在依赖注入完成后执行
@PostConstruct
public void initHystrixConfig() {
HystrixCommandProperties.Setter propSetter = HystrixCommandProperties.Setter()
.withExecutionTimeoutEnabled(true)
.withExecutionTimeoutInMilliseconds(HYSTRIX_TIMEOUT)
.withExecutionIsolationStrategy(HystrixCommandProperties.ExecutionIsolationStrategy.THREAD)
.withFallbackEnabled(true)
.withExecutionIsolationThreadInterruptOnTimeout(true)
.withExecutionIsolationThreadInterruptOnFutureCancel(true)
.withCircuitBreakerRequestVolumeThreshold(50) // 窗口最小请求数
.withCircuitBreakerErrorThresholdPercentage(60); // 错误率阈值,10s内如果50个请求有60%的错误则触发熔断降级
this.setter = HystrixCommand.Setter
.withGroupKey(HystrixCommandGroupKey.Factory.asKey("aiCheck"))
.andThreadPoolPropertiesDefaults(HystrixThreadPoolProperties.Setter()
.withKeepAliveTimeMinutes(10)
// .withMaximumSize(50)
.withCoreSize(20)
.withMaxQueueSize(100) // 队列容量
.withQueueSizeRejectionThreshold(100) // 队列拒绝阈值(必须与队列容量一致)
.withAllowMaximumSizeToDivergeFromCoreSize(false))
.andCommandPropertiesDefaults(propSetter);
}
3.2 调用混元模型API
/**
* AI文本审核
* @param sysPrompt 系统级prompt
* @param checkContent 待审核内容
* @return AITextCheckResultDTO
* @author better
**/
private AITextCheckResultDTO hunyuanTextCheck(String sysPrompt, String checkContent) throws Exception {
if ($.isAnyBlank(sysPrompt,checkContent)) {
log.warn("混元AI文本审核失败-参数异常");
return null;
}
ChatCompletionsRequest req = new ChatCompletionsRequest();
req.setModel(MODEL_NAME);
Message[] messages = new Message[2];
Message message1 = new Message();
message1.setRole("system");
message1.setContent(sysPrompt);
messages[0] = message1;
Message message2 = new Message();
message2.setRole("user");
message2.setContent(checkContent);
messages[1] = message2;
req.setMessages(messages);
// 返回的resp是一个ChatCompletionsResponse的实例,与请求对象对应
ChatCompletionsResponse resp = null;
resp = getHunyuanClient().ChatCompletions(req);
String requestId = resp.getRequestId();
Choice[] choices = resp.getChoices();
if (null != choices && choices.length > 0) {
Choice choice = choices[0];
String finishReason = choice.getFinishReason();
Message _message = choice.getMessage();
if (null != _message && "stop".equals(finishReason)) {
String result = _message.getContent();
return AITextCheckResultDTO.builder()
.requestId(requestId)
.checkResult(result)
.checkStatus(getAiCheckStatus(result))
.build();
}
}
log.warn("混元AI文本审核失败,checkContent:{},resp:{}", checkContent, JSONObject.toJSONString(resp));
return null;
}
3.3 完整代码
<dependency>
<groupId>com.tencentcloudapi</groupId>
<artifactId>tencentcloud-sdk-java-hunyuan</artifactId>
<version>3.1.1085</version>
</dependency>
<dependency>
<groupId>com.tencentcloudapi</groupId>
<artifactId>tencentcloud-sdk-java-lkeap</artifactId>
<version>3.1.1228</version>
</dependency>
package com.better.douyin;
import com.alibaba.fastjson.JSONObject;
import com.netflix.hystrix.HystrixCommand;
import com.netflix.hystrix.HystrixCommandGroupKey;
import com.netflix.hystrix.HystrixCommandProperties;
import com.netflix.hystrix.HystrixThreadPoolProperties;
import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import com.tencentcloudapi.hunyuan.v20230901.HunyuanClient;
import com.tencentcloudapi.hunyuan.v20230901.models.ChatCompletionsRequest;
import com.tencentcloudapi.hunyuan.v20230901.models.ChatCompletionsResponse;
import com.tencentcloudapi.hunyuan.v20230901.models.Choice;
import com.tencentcloudapi.hunyuan.v20230901.models.Message;
import lombok.extern.slf4j.Slf4j;
import net.dreamlu.mica.core.utils.$;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
@Slf4j
@Component
public class AICheckHunYuanUtils {
private final static String MODEL_NAME = "hunyuan-turbos-latest";
private final static String SECRET_ID = "你的ID";
private final static String SECRET_KEY = "你的KEY";
/**
* 通过PROMPT定义最终的响应结果为【审核通过】和【审核不通过】
*/
private final static String CHECK_PASS = "审核通过";
/**
* hystrix的核心线程数,控制并发数量
*/
@Value("${thread.core.size:1}")
private Integer coreSize;
/**
* 超时时间15秒(包括排队时间)
*/
private static final int HYSTRIX_TIMEOUT = 15000;
/**
* hystrix配置
*/
private HystrixCommand.Setter setter;
// 在依赖注入完成后执行
@PostConstruct
public void initHystrixConfig() {
HystrixCommandProperties.Setter propSetter = HystrixCommandProperties.Setter()
.withExecutionTimeoutEnabled(true)
.withExecutionTimeoutInMilliseconds(HYSTRIX_TIMEOUT)
.withExecutionIsolationStrategy(HystrixCommandProperties.ExecutionIsolationStrategy.THREAD)
.withFallbackEnabled(true)
.withExecutionIsolationThreadInterruptOnTimeout(true)
.withExecutionIsolationThreadInterruptOnFutureCancel(true)
.withCircuitBreakerRequestVolumeThreshold(50) // 窗口最小请求数
.withCircuitBreakerErrorThresholdPercentage(60); // 错误率阈值,10s内如果50个请求有60%的错误则触发熔断降级
this.setter = HystrixCommand.Setter
.withGroupKey(HystrixCommandGroupKey.Factory.asKey("aiCheck"))
.andThreadPoolPropertiesDefaults(HystrixThreadPoolProperties.Setter()
.withKeepAliveTimeMinutes(10)
// .withMaximumSize(50)
.withCoreSize(coreSize)
.withMaxQueueSize(100) // 队列容量
.withQueueSizeRejectionThreshold(100) // 队列拒绝阈值(必须与队列容量一致)
.withAllowMaximumSizeToDivergeFromCoreSize(false))
.andCommandPropertiesDefaults(propSetter);
}
private HunyuanClient hunyuanClient = null;
private HunyuanClient getHunyuanClient() {
if (hunyuanClient != null) {
return hunyuanClient;
}
synchronized(this) {
if (hunyuanClient != null) {
return hunyuanClient;
}
Credential cred = new Credential(SECRET_ID, SECRET_KEY);
// 实例化一个http选项,可选的,没有特殊需求可以跳过
HttpProfile httpProfile = new HttpProfile();
httpProfile.setEndpoint("hunyuan.tencentcloudapi.com");
httpProfile.setConnTimeout(10);
httpProfile.setReadTimeout(60);
httpProfile.setWriteTimeout(60);
// 实例化一个client选项,可选的,没有特殊需求可以跳过
ClientProfile clientProfile = new ClientProfile();
clientProfile.setHttpProfile(httpProfile);
hunyuanClient = new HunyuanClient(cred, "", clientProfile);
}
return hunyuanClient;
}
/**
* AI文本审核
* @param sysPrompt 系统级prompt
* @param checkContent 待审核内容
* @return AITextCheckResultDTO
* @author better
**/
private AITextCheckResultDTO hunyuanTextCheck(String sysPrompt, String checkContent) throws Exception {
if ($.isAnyBlank(sysPrompt,checkContent)) {
log.warn("混元AI文本审核失败-参数异常");
return null;
}
ChatCompletionsRequest req = new ChatCompletionsRequest();
req.setModel(MODEL_NAME);
Message[] messages = new Message[2];
Message message1 = new Message();
message1.setRole("system");
message1.setContent(sysPrompt);
messages[0] = message1;
Message message2 = new Message();
message2.setRole("user");
message2.setContent(checkContent);
messages[1] = message2;
req.setMessages(messages);
// 返回的resp是一个ChatCompletionsResponse的实例,与请求对象对应
ChatCompletionsResponse resp = null;
resp = getHunyuanClient().ChatCompletions(req);
String requestId = resp.getRequestId();
Choice[] choices = resp.getChoices();
if (null != choices && choices.length > 0) {
Choice choice = choices[0];
String finishReason = choice.getFinishReason();
Message _message = choice.getMessage();
if (null != _message && "stop".equals(finishReason)) {
String result = _message.getContent();
return AITextCheckResultDTO.builder()
.requestId(requestId)
.checkResult(result)
.checkStatus(getAiCheckStatus(result))
.build();
}
}
log.warn("混元AI文本审核失败,checkContent:{},resp:{}", checkContent, JSONObject.toJSONString(resp));
return null;
}
/**
* 获取AI审核结果状态
* @param checkResult 审核结果
* @return Integer AI审核状态: 1-通过 2-拦截
* @author better
**/
private Integer getAiCheckStatus(String checkResult) {
if ($.isBlank(checkResult) || !checkResult.contains(CHECK_PASS)) {
return 2;
}
return 1;
}
public AITextCheckResultDTO aiTextCheck(String sysPrompt, String checkContent) {
// 通过Setter创建创建HystrixCommand
HystrixCommand<AITextCheckResultDTO> hystrixCommand = new HystrixCommand<AITextCheckResultDTO>(setter) {
@Override
protected AITextCheckResultDTO run() throws Exception {
return hunyuanTextCheck(sysPrompt,checkContent);
}
@Override
protected AITextCheckResultDTO getFallback() {
Throwable exception = getExecutionException();
if (exception != null) {
log.warn("混元AI文本审核熔断降级,checkContent:{},msg:{}",checkContent,exception.getMessage(),exception);
}else {
log.warn("混元AI文本审核熔断降级,checkContent:{}",checkContent);
}
return null;
}
};
return hystrixCommand.execute();
}
}
package com.better.douyin;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class AITextCheckResultDTO {
/**
* AI请求ID
*/
private String requestId;
/**
* AI审核结果
*/
private String checkResult;
/**
* AI审核状态: 1-通过 2-拦截
*/
private Integer checkStatus;
}
3.4 线程池控制上游调用的并发数
通过上面的Hystrix可以解决调用混元模型API时不会超过允许并发路数的问题,但如果上游并发请求数量太大,远远超过了Hystrix的承载能力(即远超过了核心线程数+排队容量),那么可能会频繁引起Hystrix的熔断降级,通过配置线程池控制上游并发请求数量可以规避这个问题
package com.better.douyin;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import java.util.concurrent.Executor;
/**
* AI审核数据同步专用的线程池
* PS: 仅供AI审核用,其他业务不要使用!!!
* @description: 给CompletableFuture用的线程池,io密集型
*/
@Configuration
public class CompletableFutureConfig4AICheck {
//核心线程数(不能超过腾讯云混元模型允许的并发数)
private static final int CORE_POOL_SIZE = 20;
//线程池最大容纳线程数 (不能超过腾讯云混元模型允许的并发数)
private static final int MAX_POOL_SIZE = 20;
//阻塞队列
private static final int WORK_QUEUE = 100;
//线程空闲后的存活时长
private static final int KEEP_ALIVE_TIME = 60;
@Bean("completableFutureAsyncExecutor4AICheck")
public Executor getCompletableFutureAsyncExecutor4AICheck() {
ThreadPoolTaskExecutor threadPoolTaskExecutor = new ThreadPoolTaskExecutor();
//核心线程数
threadPoolTaskExecutor.setCorePoolSize(CORE_POOL_SIZE);
//最大线程数
threadPoolTaskExecutor.setMaxPoolSize(MAX_POOL_SIZE);
//等待队列
threadPoolTaskExecutor.setQueueCapacity(WORK_QUEUE);
//线程前缀
threadPoolTaskExecutor.setThreadNamePrefix("completableFutureExecutor4AiCheck-");
//线程池维护线程所允许的空闲时间,单位为秒
threadPoolTaskExecutor.setKeepAliveSeconds(KEEP_ALIVE_TIME);
// // 线程池对拒绝任务(无线程可用)的处理策略,默认AbortPolicy,即丢弃任务并抛出RejectedExecutionException异常
// threadPoolTaskExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
threadPoolTaskExecutor.initialize();
return threadPoolTaskExecutor;
}
}
通过定时任务批量获取上游UGC数据进行异步AI审核
/**
* AI审核相关
*/
@Slf4j
@Component
public class AICheckManager {
@Resource
private AiCheckHunYuanUtils aiCheckHunYuanUtils;
/**
* 通过xxl-job定时任务每间隔6秒调用一次方法,每次批量获取20条待审核的UGC数据,并异步调用AI审核接口
* PS: 这里只写了代码框架,可根据实际业务自行实现业务代码
*/
public void batchSyncCheck() {
// TODO 自己实现罗
List<T> list = "获取的待审核的UGC列表";
list.stream().forEach(item -> {
// 异步处理
CompletableFuture.runAsync(() -> {
try {
AITextCheckResultDTO aiCheckResult = aiCheckHunYuanUtils.aiTextCheck(systemPrompt, 20);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}, completableFutureAsyncExecutor4AICheck);
});
}
}
PS : 定时任务每6秒执行1次,并发20,即6秒可以处理20条UGC数据,1分钟可以处理200条,1天可以处理28w+条数据,已达到我们的预期目标20w条了!