DFA算法(有穷自动机):匹配效率不高,比较耗费内存,敏感词越多,内存占用越大
1.敏感词工具
//敏感词工具
public class SensitiveWordEngine {
//敏感词库
public static Map sensitiveWordMap = null;
//只过滤最小敏感词
public static int minMatchTYpe = 1;
//过滤所有敏感词
public static int maxMatchType = 2;
//敏感词库敏感词数量
public static int getWordSize() {
if (SensitiveWordEngine.sensitiveWordMap == null)
{
return 0;
}
return SensitiveWordEngine.sensitiveWordMap.size();
}
/**
* 是否包含敏感词
*
* @param txt
* @param matchType
* @return
*/
public static boolean isContainSensitiveWord(String txt, int matchType) {
boolean flag = false;
for (int i = 0; i < txt.length(); i++) {
int matchFlag = checkSensitiveWord(txt, i, matchType);
if (matchFlag > 0) {
flag = true;
}
}
return flag;
}
/**
* 获取敏感词内容
*
* @param txt
* @param matchType
* @return 敏感词内容
*/
public static Set<String> getSensitiveWord(String txt, int matchType) {
Set<String> sensitiveWordList = new HashSet<String>();
for (int i = 0; i < txt.length(); i++) {
int length = checkSensitiveWord(txt, i, matchType);
if (length > 0) {
// 将检测出的敏感词保存到集合中
sensitiveWord