import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 敏感词过滤器
* DFA有穷状态机算法的实现
*/
@Service
public class SensitiveWordService implements InitializingBean {
private static final Logger logger = LoggerFactory.getLogger(SensitiveWordService.class);
@Value("${sensitive.word}")
private String sensitiveWord;
private static Map sensitiveWordMap;
/**
* 循环text中的字符,依次调用检查敏感词方法
*
* @param text the text
* @return boolean
*/
public boolean check(String text) {
for (int i = 0, length = text.length(); i < length; i++) {
if (containsSensitiveWord(text, i)) {
return true;
}
}
return false;
}
/**
* 检查是否包含敏感词
*
* @param text
* @param index
* @return
*/
private boolean containsSensitiveWord(String text, int index) {
char word;
Map<Object, Object> currMap = sensitiveWordMap;
for (int i = index, length = text.length(); i < length; i++) {
word = text.charAt(i);
currMap = (Map<Object, Object>) currMap.get(word);
if (currMap == null) {
break;
} else {
if ((Boolean) currMap.get("isEnd")) {
logger.error("查询到敏感词:{}", text.substring(index, i + 1));
return true;
}
}
}
return false;
}
/**
* 将敏感词库转换成Map格式
*
* @param words
* @return
*/
private Map<Object, Object> convertSensitiveWordToMap(List<String> words) {
if (words == null || words.size() == 0) {
return new HashMap();
}
sensitiveWordMap = new HashMap(words.size()); //初始化敏感词容器,减少扩容操作
Map<Object, Object> _currMap;
Map<Object, Object> _newMap;
for (String word : words) {
if (word == null || "".equals(word)) {
continue;
}
_currMap = sensitiveWordMap;
for (int i = 0, length = word.length(); i < length; i++) {
char _char = word.charAt(i); //获取当前字符
Object _temp = _currMap.get(_char);
if (_temp == null) {
_newMap = new HashMap();
_newMap.put("isEnd", false);
_currMap.put(_char, _newMap);
_currMap = _newMap;
} else {
_currMap = (Map<Object, Object>) _temp;
}
if ((length - 1) == i) {//如是最后一个字符,设置 isEnd=true
_currMap.put("isEnd", true);
}
}
}
return sensitiveWordMap;
}
@Override
public void afterPropertiesSet() throws Exception {
String[] split = sensitiveWord.split(",");
List<String> sensitiveWords = new ArrayList();
for (String s : split) {
sensitiveWords.add(s);
}
convertSensitiveWordToMap(sensitiveWords);
}
/**
* The entry point of application.
*
* @param args the input arguments
*/
public static void main(String[] args) {
List<String> sensitiveWords = new ArrayList();
sensitiveWords.add("拜登");
sensitiveWords.add("特朗普");
SensitiveWordService filter = new SensitiveWordService();
sensitiveWordMap = filter.convertSensitiveWordToMap(sensitiveWords);
String text1 = "拜登了,白等了,等也白等";
String text2 = "川普:俺还要玩嘛";
System.out.println("“" + text1 + "”" + (filter.check(text1) ? "" : "不") + "包含敏感词");
System.out.println("“" + text2 + "”" + (filter.check(text2) ? "" : "不") + "包含敏感词");
}
}
基本思路就是先将敏感词库转换为一个map形式,再通过输入文本与敏感词map进行比对判断文本是否存在敏感词