阿里云文本审核(java敏感词效验)

最新推荐文章于 2024-06-17 14:48:09 发布

craywen

最新推荐文章于 2024-06-17 14:48:09 发布

阅读量6.3k

点赞数 4

分类专栏： java

本文链接：https://blog.csdn.net/qq_38893133/article/details/117294596

版权

java 专栏收录该内容

30 篇文章 1 订阅

订阅专栏

文本敏感词校验

背景: 用户社区评论或分享的文章或评论,需要合法合规,不能包含涉政等违规的铭感词.

使用阿里云的内容审核

准备工作

阿里云账号,需要开通敏感词服务 注意(有accessKeyId和accessKeySecret)都不行,须开通服务
文档地址: https://help.aliyun.com/document_detail/70439.html?spm=a2c4g.11186623.6.701.1cea2188bMuZUi
调试地址:https://next.api.aliyun.com/api/Green/2018-05-09/TextScan

java代码(做一次笔记吧qaq)

依赖

<dependency>
  <groupId>com.aliyun</groupId>
  <artifactId>aliyun-java-sdk-core</artifactId>
  <version>4.5.16</version>
</dependency>

代码

@Slf4j
@Component
public class AliYunWordFilterHandler {

    private static final String region = "cn-beijing";

    @Value("${aliyun.oss.accessKeyId}")
    private String accessKeyId;

    @Value("${aliyun.oss.accessKeySecret}")
    private String accessKeySecret;

	//设置获取client
    private IAcsClient getClient() {
        IClientProfile profile = DefaultProfile.getProfile(region, accessKeyId, accessKeySecret);
        //下面走的是阿帕奇的,自行选择
        // DefaultProfile profile1 = DefaultProfile.getProfile(region, accessKeyId, accessKeySecret);         
        return new DefaultAcsClient(profile);
    }
    
	//设置请求头
    private CommonRequest getDefaultCommonRequest() {
        CommonRequest request = new CommonRequest();
        request.setProtocol(ProtocolType.HTTPS);
        request.setMethod(MethodType.POST);
        request.setDomain("green.cn-beijing.aliyuncs.com");
        request.setVersion("2018-05-09");
        request.setUriPattern("/green/text/scan"); //垃圾扫描    request.setUriPattern("/green/text/feedback"); // 垃圾反馈
        //设置超时时间
        request.setSysConnectTimeout(6000);
        request.setSysReadTimeout(6000);
        request.putHeadParameter("Content-Type", "application/json");
        return request;
    }
    
	//请求参数封装 map
    private Map<String, Object> getExecuteMap(List<String> tasks) {
        Map<String, Object> resultMap = new HashMap<>(2);
        List<Map<String, Object>> inputBodyList = new ArrayList<>();

        for (String task : tasks) {
            Map<String, Object> requestBodyMap = new HashMap<String, Object>();
            requestBodyMap.put("dataId", UUID.randomUUID().toString());
            requestBodyMap.put("content", task); // 待检测的文本，长度不超过10000个字符
            inputBodyList.add(requestBodyMap);
        }
        resultMap.put("scenes", Collections.singletonList("antispam")); // 检测场景，文本垃圾检测传递：antispam
        resultMap.put("tasks", inputBodyList);
        return resultMap;
    }
    
	//请求参数封装 JSONObject (linkhashMap)
    private JSONObject getExecuteJSONObject(List<String> tasks) {
        JSONObject resultMap = new JSONObject();
        JSONArray inputBodyList = new JSONArray();
        for (String task : tasks) {
            JSONObject requestBody = new JSONObject();
            requestBody.put("dataId", UUID.randomUUID().toString());
            requestBody.put("content", task); // 待检测的文本，长度不超过10000个字符*/
            inputBodyList.add(requestBody);
        }
        resultMap.put("scenes", Collections.singletonList("antispam")); // 检测场景，文本垃圾检测传递：antispam
        resultMap.put("tasks", inputBodyList);
        return resultMap;
    }

    //批量效验,阿里的api 详情批量限制100个,单个长度不能超过10000
	@TimerLog
    public List<AuditInfo> textReviews(List<String> content) {
        List<AuditInfo> result = new LinkedList<>();
        IAcsClient client = getClient();
        CommonRequest request = getDefaultCommonRequest();
        Map<String, Object> executeMap = getExecuteMap(content);
        log.info("阿里敏感词检测:[start]:\n {}", JSONUtils.toString(executeMap));
        request.setHttpContent(JSONUtils.toString(executeMap).getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8.name(), JSON);
        try {
            CommonResponse response = client.getCommonResponse(request);
            log.info("阿里敏感词检测:[end]:\n {}", response.getData());
            HttpResponse httpResponse = response.getHttpResponse();
            if (httpResponse.isSuccess()) {
                log.info("敏感词效验 成功");
                String responseContent = new String(httpResponse.getHttpContent(), StandardCharsets.UTF_8);
                JSONObject scrResponse = JSONUtils.parseObject(responseContent, JSONObject.class);
                if (200 == scrResponse.getInteger("code")) {
                    JSONArray taskResults = scrResponse.getJSONArray("data");
                    for (int i = 0; i < taskResults.size(); i++) {
                        JSONObject taskResultObj = taskResults.getJSONObject(i);
                        AuditInfo auditInfo = new AuditInfo();
                        //如果被检测文本命中了自定义关键词词库中的关键词，则会返回当前字段，并将命中的关键词替换为星号（*）。
                        String filteredContent = taskResultObj.getString("filteredContent");
                        auditInfo.setContent(filteredContent);
                        if (200 == taskResultObj.getInteger("code")) {
                            JSONArray sceneResults = taskResultObj.getJSONArray("results");
                            for (int j = 0; j < sceneResults.size(); j++) {
                                JSONObject taskSubObject = sceneResults.getJSONObject(j);
                                //这里检测只使用一个 result 结果,检测文本为一个
                                String scene = taskSubObject.getString("scene");
                                //pass：文本正常，可以直接放行。 review：文本需要进一步人工审核。 block：文本违规，可以直接删除或者限制公开。
                                //@see https://help.aliyun.com/document_detail/70439.html?spm=a2c4g.11186623.6.701.1cea2188bMuZUi
                                String suggestion = taskSubObject.getString("suggestion");
                                String label = taskSubObject.getString("label");
                                double rate = taskSubObject.getDouble("rate");
                                auditInfo = convertMsg(label, auditInfo, rate);
                                result.add(auditInfo);
                            }
                        } else {
                            System.out.println("task process fail:" + taskResultObj.getInteger("code"));
                            log.error("阿里敏感词检测:请求超时！");
                        }
                    }
                } else {
                    log.error("检测状态失败 code:{}", scrResponse.getInteger("code"));
                }
            }
        } catch (ClientException e) {
            log.error("请求调用失败,检查是否是超时");
            e.printStackTrace();
        }
        return result;
    }
    
//标签效验
private AuditInfo convertMsg(String label, AuditInfo audit, double rate) {
        //正常放行  normal：正常文本 spam：含垃圾信息 ad：广告 flood：灌水  meaningless：无意义 customized：自定义（例如命中自定义关键词）
        //拦截 politics：涉政 terrorism：暴恐 abuse：辱骂 porn：色情 contraband：违禁
        audit.setResult(true);
        audit.setMsg("审核正常");
        switch (label) {
            case "normal":
                break;
            case "spam":
                if (rate > 50.0) {
                    audit.setResult(true);
                    audit.setMsg("含垃圾信息");
                }
                break;
            case "ad":
                if (rate > 50.0) {
                    audit.setResult(true);
                    audit.setMsg("广告");
                }
                break;
            case "politics":
                audit.setResult(false);
                audit.setMsg("涉政");
                break;
            case "terrorism":
                audit.setResult(false);
                audit.setMsg("暴恐");
                break;
            case "abuse":
                if (rate > 70.0) {
                    audit.setResult(false);
                    audit.setMsg("辱骂");
                }
                break;
            case "porn":
                if (rate > 90.0) {
                    audit.setResult(false);
                    audit.setMsg("色情");
                }
                break;
            case "flood":
                if (rate > 95.0) {
                    audit.setResult(true);
                    audit.setMsg("灌水");
                }
                break;
            case "contraband":
                audit.setResult(false);
                audit.setMsg("违禁");
                break;
            case "meaningless":
                if (rate > 95.0) {
                    audit.setResult(true);
                    audit.setMsg("无意义");
                }
                break;
            case "qrcode":
                if (rate > 60.0) {
                    audit.setResult(true);
                    audit.setMsg("二维码");
                }
                break;
            default:
                audit.setResult(true);
                audit.setMsg("自定义");
                break;
        }
        return audit;
    }


/**
 * @author: craywen
 * @date: 2021-05-25 14:23
 * @desc: 审核结果
 */
@Data
public class AuditInfo {

    /**
     * 审核结果
     */
    private boolean result;

    /**
     * 返回的消息
     */
    private String msg;

    /**
     * 内容
     */
    private String content;

}

craywen

关注

4
点赞
踩
20

收藏

觉得还不错? 一键收藏
3
评论
阿里云文本审核(java敏感词效验)

文本敏感词校验背景: 用户社区评论或分享的文章或评论,需要合法合规,不能包含涉政等违规的铭感词.使用阿里云的内容审核准备工作阿里云账号,需要开通敏感词服务注意(有accessKeyId和accessKeySecret)都不行,须开通服务文档地址: https://help.aliyun.com/document_detail/70439.html?spm=a2c4g.11186623.6.701.1cea2188bMuZUi调试地址:https://next.api.aliyun.com/
复制链接

扫一扫

专栏目录