整理了个敏感词处理的模块,没有写替换敏感词的部分,有敏感词就打回去重新写吧。话不多说,上代码>>>>
public class SensitiveWord {
// 敏感词匹配长度
public static final int minMatchType = 1;
public static final int maxMatchType = 2;
// 敏感词集合
public static Map sensitiveWordMap;
private static final String SENSITIVE_WORD_MAP = "SENSITIVE_WORD_MAP";
/**
* 敏感词HashMap集合进行初始化
* @return
*/
public Map initSensitiveWordMap() {
log.info("准备初始化敏感词库~");
try {
// redis中获取sensitiveWordMap
sensitiveWordMap = AbstractRedisExecutor.instance().getMap(SENSITIVE_WORD_MAP);
// 未获取到时从数据库中读取数据并存放
if (sensitiveWordMap == null) {
// 查询数据库所有启用状态敏感词
List<Bean> sensitiveWordList = ServDao.finds("sensiteve_word", " and flag= 1 ");
Set<String> keyWordSet = new HashSet<>();
// 查询到的敏感词NAME赋值到set集合中
for (Bean bean : sensitiveWordList) {
keyWordSet.add(bean.getStr("NAME").trim());
}
// 敏感词添加到HashMap中
addSensitiveWordToHashMap(keyWordSet);
Context.cleanThreadData();
// sensitiveWordMap存放到redis中
AbstractRedisExecutor.instance().setMap(SENSITIVE_WORD_MAP, sensitiveWordMap, 8000);
}
} catch (Exception e) {
throw new TipException("敏感词库初始化失败");
}
log.info("敏感词库初始化成功~");
return sensitiveWordMap;
}
/**
* 添加敏感词到HashMap中
* @param keyWordSet 敏感词set集合
*/
private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
// 初始化HashMap对象,设定容器大小
sensitiveWordMap = new HashMap(keyWordSet.size());
// 敏感词
String key = null;
Map nowMap = null;
Map<String, String> newWordMap = null;
// 循环敏感词set集合
Iterator<String> iterator = keyWordSet.iterator();
while (iterator.hasNext()) {
key = iterator.next();
nowMap = sensitiveWordMap;
for (int i = 0; i < key.length(); i++) {
// 截取敏感词中的字,字为HashMap对象的key键值
char keyChar = key.charAt(i);
// 判断HashMap中是否存在
Object wordMap = nowMap.get(keyChar);
if (wordMap != null) {
nowMap = (Map) wordMap;
} else { // 不存在,构建新的map,isEnd设置为0.
newWordMap = new HashMap<>();
newWordMap.put("isEnd", "0");
nowMap.put(keyChar, newWordMap);
nowMap = newWordMap;
}
// 最后一个字,标识为1,否则为0
if (i == key.length() - 1) {
nowMap.put("isEnd", "1");
}
}
}
}
/**
* 获取敏感词内容
* @param txt 要检查文字
* @param matchType 敏感词匹配类型
* @return
*/
public Set<String> getSensitiveWord(String txt, int matchType) {
HashSet<String> sensitiveWordSet = new HashSet<>();
for (int i = 0; i < txt.length(); i++) {
int length = checkedSensitiveWord(txt, i, matchType);
if (length > 0) {
sensitiveWordSet.add(txt.substring(i, i + length));
}
}
return sensitiveWordSet;
}
/**
* 敏感词数量
* @return
*/
public static int getWordSize() {
if (sensitiveWordMap == null) {
return 0;
}
return sensitiveWordMap.size();
}
/**
* 是否包含敏感词
* @param txt 要检查文本
* @param matchType 匹配规则
* @return
*/
public boolean isContaintSensitiveWord(String txt, int matchType) {
boolean flag = false;
// 对字符串进行循环,检查是否存在敏感词
for (int i = 0; i < txt.length(); i++) {
// 调用checkedSensitiveWord()方法判断敏感词数量
int matchFlag = checkedSensitiveWord(txt, i, matchType);
// 根据敏感词数量得出结果是否存在敏感词
if (matchFlag > 0) {
flag = true;
}
}
return flag;
}
/**
* 检查敏感词数量
* @param txt 要检查文字
* @param beginIndex 开始位置
* @param matchType 敏感词匹配长度
* @return 存在返回敏感词字符串长度, 不存在返回0
*/
public int checkedSensitiveWord(String txt, int beginIndex, int matchType) {
// 敏感词结束标识,用于只有一位敏感词的情况
boolean flag = false;
// 敏感词匹配数量,默认为0
int matchFlag = 0;
String keyChar;
// 敏感词集合
Map nowMap = sensitiveWordMap;
for (int i = beginIndex; i < txt.length(); i++) {
keyChar = String.valueOf(txt.charAt(i));
// 判断这个字是否存在敏感词集合中
nowMap = (Map) nowMap.get(keyChar);
// 不存在,返回
if (nowMap == null) {
break;
}
matchFlag++;
// 判断是否是敏感词的结尾字,再判断是否继续监测
if (isEnd(nowMap)) {
flag = true;
// 判断过滤类型,最小匹配规则直接返回,最大匹配规则继续查找
if (minMatchType == matchType) {
break;
}
}
}
if (!flag) {
matchFlag = 0;
}
return matchFlag;
}
/**
* 判断当前节点是否为最后一个
* @param nowMap
* @return
*/
private static boolean isEnd(Map nowMap) {
boolean flag = false;
if ("1".equals(nowMap.get("isEnd"))) {
flag = true;
}
return flag;
}
/**
* 存储完成后初始化敏感词库(业务代码,新增敏感词后进行初始化操作)
* @param paramBean
* @param outBean
*/
protected void afterSave(ParamBean paramBean, OutBean outBean) {
// 查询数据库所有启用状态敏感词
List<Bean> sensitiveWordList = ServDao.finds("exexm_sensitive_word", " and flag= 1 ");
Set<String> keyWordSet = new HashSet<>();
// 查询到的敏感词NAME赋值到set集合中
for (Bean bean : sensitiveWordList) {
keyWordSet.add(bean.getStr("NAME").trim());
}
// 敏感词添加到HashMap中
addSensitiveWordToHashMap(keyWordSet);
// sensitiveWordMap存放到redis中
AbstractRedisExecutor.instance().setMap(SENSITIVE_WORD_MAP, sensitiveWordMap, 8000);
super.afterSave(paramBean, outBean);
}
}