敏感词过滤util

import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.*;

/**

  • 敏感词检查工具类,采用DFA算法
    */
    @Component
    public class SensitiveWordUtil {
    private static final String END_FLAG = “end”;
    private static final String MIN_MATCH = “MIN_MATCH”;
    private static final String MAX_MATCH = “MAX_MATCH”;
    private static SensitiveWordMapper sensitiveWordMapper;
    private static Map<Object, Object> sensitiveWordMap = null;
    SensitiveWordUtil(){}

    @Autowired
    public void setSensitiveWordMapper(CcSensitiveWordMapper ccSensitiveWordMapper){
    SensitiveWordUtil.sensitiveWordMapper = sensitiveWordMapper;
    }
    /**

    • 检查是否包含敏感词汇
    • @param word
    •        待检查文本
      
    • @return 是否包含敏感词汇
      */
      public static Boolean checkSensitiveWord(String word) {
      if (StringUtils.isBlank(word)) {
      return true;
      }
      if(sensitiveWordMap == null){
      Set sensitiveWords = sensitiveWordMapper.queryAllSensitiveWords();
      sensitiveWordMap = initSensitiveWordsMap(sensitiveWords);
      }
      String sensitiveWord = getSensitiveWord(word, sensitiveWordMap);
      return StringUtils.isBlank(sensitiveWord);
      }

    /**

    • 将敏感词汇列表转换为hash树
    • @param sensitiveWords
    •        敏感词汇列表
      
    • @return 转换好的树
      */
      public static Map<Object, Object> initSensitiveWordsMap(Set sensitiveWords) {
      Map<Object, Object> sensitiveWordsMap = new HashMap<>(sensitiveWords.size());
      String currentWord;
      Map<Object, Object> currentMap;
      Map<Object, Object> subMap;
      for (String sensitiveWord : sensitiveWords) {
      currentWord = sensitiveWord;
      // 敏感词长度必须大于等于2
      if (currentWord == null || currentWord.trim().length() < 2) {
      continue;
      }
      currentMap = sensitiveWordsMap;
      for (int i = 0; i < currentWord.length(); i++) {
      char c = currentWord.charAt(i);
      subMap = (Map<Object, Object>)currentMap.get©;
      if (subMap == null) {
      subMap = new HashMap<>(16);
      currentMap.put(c, subMap);
      currentMap = subMap;
      } else {
      currentMap = subMap;
      }
      if (i == currentWord.length() - 1) {
      // 如果是最后一个字符,则put一个结束标志,这里只需要保存key就行了,value为null可以节省空间。
      // 如果不是最后一个字符,则不需要存这个结束标志,同样也是为了节省空间。
      currentMap.put(END_FLAG, null);
      }
      }
      }
      return sensitiveWordsMap;
      }

    /**

    • 获取一个匹配到的敏感词汇
    • @param text
    •        待检查文本
      
    • @param sensitiveWordsMap
    •        敏感词汇树
      
    • @return 匹配到的第一个敏感词汇
      */
      public static String getSensitiveWord(String text, Map<Object, Object> sensitiveWordsMap) {
      if (text == null || text.trim().length() == 0) {
      return null;
      }
      for (int i = 0; i < text.length(); i++) {
      int sensitiveWordLength = getSensitiveWordLength(text, i, MIN_MATCH, sensitiveWordsMap);
      if (sensitiveWordLength > 0) {
      return text.substring(i, i + sensitiveWordLength);
      }
      }
      return null;
      }

    /**

    • 获取匹配到的敏感词汇列表
    • @param text
    •        待检查文本
      
    • @param sensitiveWordsMap
    •        敏感词汇树
      
    • @return 匹配所有敏感词汇
      */
      public static Set getSensitiveWords(String text, Map<Object, Object> sensitiveWordsMap) {
      if (text == null || text.trim().length() == 0) {
      return Collections.emptySet();
      }
      Set sensitiveWords = new HashSet<>();
      for (int i = 0; i < text.length(); i++) {
      int sensitiveWordLength = getSensitiveWordLength(text, i, MAX_MATCH, sensitiveWordsMap);
      if (sensitiveWordLength > 0) {
      String sensitiveWord = text.substring(i, i + sensitiveWordLength);
      sensitiveWords.add(sensitiveWord);
      i = i + sensitiveWordLength - 1;
      }
      }
      return sensitiveWords;
      }

    private static int getSensitiveWordLength(String text, int startIndex, String matchType,
    Map<Object, Object> sensitiveWordsMap) {
    if (text == null || text.trim().length() == 0) {
    return 0;
    }
    char currentChar;
    Map<Object, Object> currentMap = sensitiveWordsMap;
    int wordLength = 0;
    boolean endFlag = false;
    for (int i = startIndex; i < text.length(); i++) {
    currentChar = text.charAt(i);
    Map<Object, Object> subMap = (Map<Object, Object>)currentMap.get(currentChar);
    if (subMap == null) {
    break;
    } else {
    wordLength++;
    if (subMap.containsKey(END_FLAG)) {
    endFlag = true;
    if (MIN_MATCH.equals(matchType)) {
    break;
    } else {
    currentMap = subMap;
    }
    } else {
    currentMap = subMap;
    }
    }
    }
    if (!endFlag) {
    wordLength = 0;
    }
    return wordLength;
    }
    }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值