敏感词过滤器工具类(直接可用) DFA有穷状态机算法的实现


import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 敏感词过滤器
 * DFA有穷状态机算法的实现
 */
@Service
public class SensitiveWordService implements InitializingBean {

    private static final Logger logger = LoggerFactory.getLogger(SensitiveWordService.class);

    @Value("${sensitive.word}")
    private String sensitiveWord;

    private static Map sensitiveWordMap;

    /**
     * 循环text中的字符,依次调用检查敏感词方法
     *
     * @param text the text
     * @return boolean
     */
    public boolean check(String text) {
        for (int i = 0, length = text.length(); i < length; i++) {
            if (containsSensitiveWord(text, i)) {
                return true;
            }
        }
        return false;
    }

    /**
     * 检查是否包含敏感词
     *
     * @param text
     * @param index
     * @return
     */
    private boolean containsSensitiveWord(String text, int index) {
        char word;
        Map<Object, Object> currMap = sensitiveWordMap;
        for (int i = index, length = text.length(); i < length; i++) {
            word = text.charAt(i);
            currMap = (Map<Object, Object>) currMap.get(word);
            if (currMap == null) {
                break;
            } else {
                if ((Boolean) currMap.get("isEnd")) {
                    logger.error("查询到敏感词:{}", text.substring(index, i + 1));
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * 将敏感词库转换成Map格式
     *
     * @param words
     * @return
     */
    private Map<Object, Object> convertSensitiveWordToMap(List<String> words) {
        if (words == null || words.size() == 0) {
            return new HashMap();
        }
        sensitiveWordMap = new HashMap(words.size()); //初始化敏感词容器,减少扩容操作
        Map<Object, Object> _currMap;
        Map<Object, Object> _newMap;
        for (String word : words) {
            if (word == null || "".equals(word)) {
                continue;
            }
            _currMap = sensitiveWordMap;
            for (int i = 0, length = word.length(); i < length; i++) {
                char _char = word.charAt(i); //获取当前字符
                Object _temp = _currMap.get(_char);
                if (_temp == null) {
                    _newMap = new HashMap();
                    _newMap.put("isEnd", false);
                    _currMap.put(_char, _newMap);
                    _currMap = _newMap;
                } else {
                    _currMap = (Map<Object, Object>) _temp;
                }
                if ((length - 1) == i) {//如是最后一个字符,设置 isEnd=true
                    _currMap.put("isEnd", true);
                }
            }
        }
        return sensitiveWordMap;
    }

    @Override
    public void afterPropertiesSet() throws Exception {
        String[] split = sensitiveWord.split(",");
        List<String> sensitiveWords = new ArrayList();
        for (String s : split) {
            sensitiveWords.add(s);
        }
        convertSensitiveWordToMap(sensitiveWords);
    }

    /**
     * The entry point of application.
     *
     * @param args the input arguments
     */
    public static void main(String[] args) {
        List<String> sensitiveWords = new ArrayList();
        sensitiveWords.add("拜登");
        sensitiveWords.add("特朗普");
        SensitiveWordService filter = new SensitiveWordService();
        sensitiveWordMap = filter.convertSensitiveWordToMap(sensitiveWords);

        String text1 = "拜登了,白等了,等也白等";
        String text2 = "川普:俺还要玩嘛";
        System.out.println("“" + text1 + "”" + (filter.check(text1) ? "" : "不") + "包含敏感词");
        System.out.println("“" + text2 + "”" + (filter.check(text2) ? "" : "不") + "包含敏感词");
    }
}

基本思路就是先将敏感词库转换为一个map形式,再通过输入文本与敏感词map进行比对判断文本是否存在敏感词

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值