import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.*;
/**
-
敏感词检查工具类,采用DFA算法
*/
@Component
public class SensitiveWordUtil {
private static final String END_FLAG = “end”;
private static final String MIN_MATCH = “MIN_MATCH”;
private static final String MAX_MATCH = “MAX_MATCH”;
private static SensitiveWordMapper sensitiveWordMapper;
private static Map<Object, Object> sensitiveWordMap = null;
SensitiveWordUtil(){}@Autowired
public void setSensitiveWordMapper(CcSensitiveWordMapper ccSensitiveWordMapper){
SensitiveWordUtil.sensitiveWordMapper = sensitiveWordMapper;
}
/**- 检查是否包含敏感词汇
- @param word
-
待检查文本
- @return 是否包含敏感词汇
*/
public static Boolean checkSensitiveWord(String word) {
if (StringUtils.isBlank(word)) {
return true;
}
if(sensitiveWordMap == null){
Set sensitiveWords = sensitiveWordMapper.queryAllSensitiveWords();
sensitiveWordMap = initSensitiveWordsMap(sensitiveWords);
}
String sensitiveWord = getSensitiveWord(word, sensitiveWordMap);
return StringUtils.isBlank(sensitiveWord);
}
/**
- 将敏感词汇列表转换为hash树
- @param sensitiveWords
-
敏感词汇列表
- @return 转换好的树
*/
public static Map<Object, Object> initSensitiveWordsMap(Set sensitiveWords) {
Map<Object, Object> sensitiveWordsMap = new HashMap<>(sensitiveWords.size());
String currentWord;
Map<Object, Object> currentMap;
Map<Object, Object> subMap;
for (String sensitiveWord : sensitiveWords) {
currentWord = sensitiveWord;
// 敏感词长度必须大于等于2
if (currentWord == null || currentWord.trim().length() < 2) {
continue;
}
currentMap = sensitiveWordsMap;
for (int i = 0; i < currentWord.length(); i++) {
char c = currentWord.charAt(i);
subMap = (Map<Object, Object>)currentMap.get©;
if (subMap == null) {
subMap = new HashMap<>(16);
currentMap.put(c, subMap);
currentMap = subMap;
} else {
currentMap = subMap;
}
if (i == currentWord.length() - 1) {
// 如果是最后一个字符,则put一个结束标志,这里只需要保存key就行了,value为null可以节省空间。
// 如果不是最后一个字符,则不需要存这个结束标志,同样也是为了节省空间。
currentMap.put(END_FLAG, null);
}
}
}
return sensitiveWordsMap;
}
/**
- 获取一个匹配到的敏感词汇
- @param text
-
待检查文本
- @param sensitiveWordsMap
-
敏感词汇树
- @return 匹配到的第一个敏感词汇
*/
public static String getSensitiveWord(String text, Map<Object, Object> sensitiveWordsMap) {
if (text == null || text.trim().length() == 0) {
return null;
}
for (int i = 0; i < text.length(); i++) {
int sensitiveWordLength = getSensitiveWordLength(text, i, MIN_MATCH, sensitiveWordsMap);
if (sensitiveWordLength > 0) {
return text.substring(i, i + sensitiveWordLength);
}
}
return null;
}
/**
- 获取匹配到的敏感词汇列表
- @param text
-
待检查文本
- @param sensitiveWordsMap
-
敏感词汇树
- @return 匹配所有敏感词汇
*/
public static Set getSensitiveWords(String text, Map<Object, Object> sensitiveWordsMap) {
if (text == null || text.trim().length() == 0) {
return Collections.emptySet();
}
Set sensitiveWords = new HashSet<>();
for (int i = 0; i < text.length(); i++) {
int sensitiveWordLength = getSensitiveWordLength(text, i, MAX_MATCH, sensitiveWordsMap);
if (sensitiveWordLength > 0) {
String sensitiveWord = text.substring(i, i + sensitiveWordLength);
sensitiveWords.add(sensitiveWord);
i = i + sensitiveWordLength - 1;
}
}
return sensitiveWords;
}
private static int getSensitiveWordLength(String text, int startIndex, String matchType,
Map<Object, Object> sensitiveWordsMap) {
if (text == null || text.trim().length() == 0) {
return 0;
}
char currentChar;
Map<Object, Object> currentMap = sensitiveWordsMap;
int wordLength = 0;
boolean endFlag = false;
for (int i = startIndex; i < text.length(); i++) {
currentChar = text.charAt(i);
Map<Object, Object> subMap = (Map<Object, Object>)currentMap.get(currentChar);
if (subMap == null) {
break;
} else {
wordLength++;
if (subMap.containsKey(END_FLAG)) {
endFlag = true;
if (MIN_MATCH.equals(matchType)) {
break;
} else {
currentMap = subMap;
}
} else {
currentMap = subMap;
}
}
}
if (!endFlag) {
wordLength = 0;
}
return wordLength;
}
}