1. 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
package com.datago.common.utils.sensitive;
import java.util.*;
public class SensitiveWordInit {
@SuppressWarnings("rawtypes")
public static HashMap sensitiveWordMap;
public SensitiveWordInit() {
super();
}
public static HashMap init(String datas) {
addSensitiveWord(datas);
return sensitiveWordMap;
}
private static void addSensitiveWord(String word) {
sensitiveWordMap = new HashMap(word.length());
Map<String, Object> now = null;
Map now2 = null;
now2 = sensitiveWordMap;
for (int i = 0; i < word.length(); i++) {
char key_word = word.charAt(i);
Object obj = now2.get(key_word);
if (obj != null) {
now2 = (Map) obj;
} else {
now = new HashMap<>();
now.put("isEnd", "0");
now2.put(key_word, now);
now2 = now;
}
if (i == word.length() - 1) {
now2.put("isEnd", "1");
}
}
}
public static List<String> getSensitiveWord(String text, int matchType) {
List<String> words = new ArrayList<String>();
Map now = sensitiveWordMap;
int count = 0;
int start = 0;
for (int i = 0; i < text.length(); i++) {
char key = text.charAt(i);
now = (Map) now.get(key);
if (now != null) {
count++;
if (count == 1) {
start = i;
}
if ("1".equals(now.get("isEnd"))) {
now = sensitiveWordMap;
words.add(text.substring(start, start + count));
count = 0;
}
} else {
now = sensitiveWordMap;
if (count == 1 && matchType == 1) {
count = 0;
} else if (count == 1 && matchType == 2) {
words.add(text.substring(start, start + count));
count = 0;
}
}
}
return words;
}
}
2. 敏感词过滤
package com.datago.common.utils.sensitive;
import com.datago.common.core.redis.RedisCache;
import com.datago.common.utils.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.*;
@Component
public class SensitivewordFilter {
private static RedisCache redisCache;
@Autowired
public void setRedisCache(RedisCache redisCache) {
SensitivewordFilter.redisCache = redisCache;
}
@SuppressWarnings("rawtypes")
private static Map sensitiveWordMap = null;
public static void initSensitiveWord(String datas) {
sensitiveWordMap = SensitiveWordInit.init(datas);
}
public static String replaceSensitiveWord(String datas, String txt, int matchType, String replaceChar) {
if (sensitiveWordMap == null) {
initSensitiveWord(datas);
}
String resultTxt = txt;
List<String> set = SensitiveWordInit.getSensitiveWord(txt, matchType);
Iterator<String> iterator = set.iterator();
String word = null;
String replaceString = null;
while (iterator.hasNext()) {
word = iterator.next();
replaceString = getReplaceChars(replaceChar, word.length());
resultTxt = resultTxt.replaceAll(word, replaceString);
}
return resultTxt;
}
private static String getReplaceChars(String replaceChar, int length) {
String resultReplace = replaceChar;
if (length > 6) {
length = 6;
}
for (int i = 1; i < length; i++) {
resultReplace += replaceChar;
}
return resultReplace;
}
public static String filterSensitive(String sensitiveTxt) {
Map<String, String> datas = redisCache.getCacheObject("treeSensitive");
String updateTxt = null;
for (Map.Entry<String, String> entry : datas.entrySet()) {
SensitivewordFilter.initSensitiveWord(entry.getKey());
if (StringUtils.isNotEmpty(updateTxt)) {
updateTxt = replaceSensitiveWord(entry.getKey(), updateTxt, 1, entry.getValue());
} else {
updateTxt = replaceSensitiveWord(entry.getKey(), sensitiveTxt, 1, entry.getValue());
}
}
return updateTxt;
}
}
3.应用
@Log(title = "过滤敏感词汇")
@GetMapping("/filterSensitive/{sensitiveTxt}")
public AjaxResult filterSensitive(@PathVariable(value = "sensitiveTxt") String sensitiveTxt) {
String s = SensitivewordFilter.filterSensitive(sensitiveTxt);
return AjaxResult.success(s);
}
4.参考文献
https://www.hutool.cn/docs/#/dfa/DFA%E6%9F%A5%E6%89%BE