算法描述参考链接:
敏感词算法描述
java敏感词过滤
敏感词:“美元”,“中国”,“北京大学”,“北大”,“南京大学”
DFAUtils
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
public class DFAUtils {
/**
* 添加敏感词到算法树
*/
public static void addSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0) {
return;
}
char[] chars = sensitiveWord.toCharArray();
Map<Character, Map> parentMap = sensitiveWordsMap;
Map<Character, Map> current = null;
synchronized (lock) {
for (int i = 0; i < chars.length; i++) {
if (i == 0) {
if (sensitiveWordsMap.size() == 0) {
/* 添加第一个敏感词的第一个字符执行此code */
if (chars.length == 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
} else {
sensitiveWordsMap.put(chars[0], null);
}
} else {
current = parentMap.get(chars[0]);
if (null == current) {
if (chars.length == 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
break;
} else {
sensitiveWordsMap.put(chars[0], null);
}
} else {
if (chars.length == 1) {
current.put(null, null);
break;
}
}
}
} else {
if (null == current) {
Map<Character, Map> childMap = new HashMap<Character, Map>();
if (i == chars.length - 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
childMap.put(chars[i], endMap);
parentMap.put(chars[i - 1], childMap);
break;
} else {
childMap.put(chars[i], null);
parentMap.put(chars[i - 1], childMap);
parentMap = childMap;
current = null;
}
} else {
Map<Character, Map> childMap = current.get(chars[i]);
if (null == childMap) {
if (i == chars.length - 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
current.put(chars[i], endMap);
} else {
current.put(chars[i], null);
parentMap = current;
current = null;
}
} else {
if (i == chars.length - 1) {
childMap.put(null, null);
} else {
parentMap = current;
current = childMap;
}
}
}
}
}
}
}
/**
* 检查敏感词(找到符合敏感词则返回--单个字符敏感词前后不是中文字符才算敏感词)
*/
public static String checkSensitiveWord(String content) {
if (null == content || content.length() == 0 || sensitiveWordsMap.size() == 0) {
return null;
}
char[] chars = content.toCharArray();
boolean isContain = Boolean.FALSE;
StringBuilder sbResult = new StringBuilder();
for (int i = 0; i < chars.length; i++) {
if (sensitiveWordsMap.containsKey(chars[i])) {
Map<Character, Map> currentMap = sensitiveWordsMap.get(chars[i]);
sbResult.append(chars[i]);
if (null == currentMap) {
break;