上一次我们简单的介绍了一下DFA算法,原文链接DFA算法简单理解实现
这一次我们基于这个算法模型实现我们的敏感词过滤功能,本功能实现背景为ssm框架和ehcache,不多说了,上才艺
一.实现思路
首先宏观的梳理了一下项目,项目中所有会往数据库中插入数据的接口方法名都是由insert或者update开头,且请求方式都为post或put,由此我们可以有以下两种思路:
-
使用spring aop 代理所有的post或put请求方法,对其中传入的String参数进行校验,切入点可以定义为:
@Around("@annotation(org.springframework.web.bind.annotation.PostMapping)||@annotation(org.springframework.web.bind.annotation.PutMapping)")
-
使用spring aop 代理所有controller层insert或update开头的方法,对其中传入的String参数进行校验,切入点可以定义为:
@Pointcut("execution(* com.krt.*.controller..*Controller*.*insert*(..)) || execution(* com.krt.*.controller..*Controller*.update(..))") public void SensitiveWord() { }
因为第一种方式对项目影响太广,所以暂时放弃,采用第二种方式
二.代码实现
1. 敏感词检测类
package com.krt.common.util;
import com.krt.oa.mapper.NoticeMapper;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.*;
/**
* @author: lyp
* @Date: 2021/8/12 14:39
* @Description:
*/
@Component
public class WordFilter {
/**
* 敏感词替换字符
*/
private final static String REPLACE_CHAR = "*";
/**
* dfa算法后的敏感词模型
*/
private static HashMap sensitiveWordMap;
/**
* 最小匹配规则
*/
private static int minMatchType = 1;
/**
* 最大匹配规则
*/
private static int maxMatchType = 2;
@Autowired
private NoticeMapper sensitiveWordMapper;
public String replaceWords(String text) {
EhcacheUtil ehcacheUtil = EhcacheUtil.getInstance();
if (StringUtils.isBlank(text)) {
return text;
}
List<String> words = (List<String>) ehcacheUtil.get("sensitiveWord", "SensitiveWord");
if (words == null) {
words = sensitiveWordMapper.listStr();
//更新
ehcacheUtil.put("sensitiveWord", "SensitiveWord", words);
}
Set<String> keyWordSet = new HashSet<>(words);
//缓存获取敏感词汇原记录
return WordFilter.replaceSensitiveWord(keyWordSet, text, WordFilter.maxMatchType);
}
/**
* 替换敏感字字符
*
* @param data 敏感字集合
* @param txt 待检查文本
* @param matchType 匹配规则
*/
private static String replaceSensitiveWord(Set<String> data, String txt, int matchType) {
if (sensitiveWordMap == null) {
addSensitiveWordToHashMap(data);
}
return replaceSensitiveWord(txt, matchType, REPLACE_CHAR);
}
/**
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
* 说明:该方法来源于互联网
*/
private static void addSensitiveWordToHashMap(Set<String> keyWordSet) {
// 初始化HashMap对象并控制容器的大小
sensitiveWordMap = new HashMap(keyWordSet.size());
// 敏感词
String key = null;
// 用来按照相应的格式保存敏感词库数据
Map nowMap = null;
// 用来辅助构建敏感词库
Map<String, String> newWorMap = null;
// 使用一个迭代器来循环敏感词集合
Iterator<String> iterator = keyWordSet.iterator();
while (iterator.hasNext()) {
key = iterator.next();
// 等于敏感词库,HashMap对象在内存中占用的是同一个地址,所以此nowMap对象的变化,sensitiveWordMap对象也会跟着改变
nowMap = sensitiveWordMap;
for (int i = 0; i < key.length(); i++) {
// 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
char keyChar = key.charAt(i);
// 判断这个字是否存在于敏感词库中
Object wordMap = nowMap.get(keyChar);
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
newWorMap = new HashMap<>();
newWorMap.put("isEnd", "0");
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
}
// 如果该字是当前敏感词的最后一个字,则标识为结尾字
if (i == key.length() - 1) {
nowMap.put("isEnd", "1");
}
}
}
}
/**
* 获取敏感词内容
*
* @param txt
* @param matchType
* @return 敏感词内容
*/
public static Set<String> getSensitiveWord(String txt, int matchType) {
Set<String> sensitiveWordList = new HashSet<String>();
for (int i = 0; i < txt.length(); i++) {
int length = checkSensitiveWord(txt, i, matchType);
if (length > 0) {
// 将检测出的敏感词保存到集合中
sensitiveWordList.add(txt.substring(i, i + length));
i = i + length - 1;
}
}
return sensitiveWordList;
}
/**
* 替换敏感词
*
* @param txt
* @param matchType
* @param replaceChar
* @return
*/
public static String replaceSensitiveWord(String txt, int matchType, String replaceChar) {
String resultTxt = txt;
Set<String> set = getSensitiveWord(txt, matchType);
Iterator<String> iterator = set.iterator();
String word = null;
String replaceString = null;
while (iterator.hasNext()) {
word = iterator.next();
replaceString = getReplaceChars(replaceChar, word.length());
resultTxt = resultTxt.replaceAll(word, replaceString);
}
return resultTxt;
}
/**
* 替换敏感词内容
*
* @param replaceChar
* @param length
* @return
*/
private static String getReplaceChars(String replaceChar, int length) {
String resultReplace = replaceChar;
for (int i = 1; i < length; i++) {
resultReplace += replaceChar;
}
return resultReplace;
}
/**
* 检查敏感词数量
*
* @param txt
* @param beginIndex
* @param matchType
* @return
*/
public static int checkSensitiveWord(String txt, int beginIndex, int matchType) {
boolean flag = false;
// 记录敏感词数量
int matchFlag = 0;
char word = 0;
Map nowMap = sensitiveWordMap;
for (int i = beginIndex; i < txt.length(); i++) {
word = txt.charAt(i);
// 判断该字是否存在于敏感词库中
nowMap = (Map) nowMap.get(word);
if (nowMap != null) {
matchFlag++;
// 判断是否是敏感词的结尾字,如果是结尾字则判断是否继续检测
if ("1".equals(nowMap.get("isEnd"))) {
flag = true;
// 判断过滤类型,如果是小过滤则跳出循环,否则继续循环
if (minMatchType == matchType) {
break;
}
}
} else {
break;
}
}
if (!flag) {
matchFlag = 0;
}
return matchFlag;
}
/**
* 敏感词库敏感词数量
*
* @return
*/
public int getWordSize() {
if (sensitiveWordMap == null) {
return 0;
}
return sensitiveWordMap.size();
}
/**
* 是否包含敏感词
*
* @param txt
* @param matchType
* @return
*/
public static boolean isContainSensitiveWord(String txt, int matchType) {
boolean flag = false;
for (int i = 0; i < txt.length(); i++) {
int matchFlag = checkSensitiveWord(txt, i, matchType);
if (matchFlag > 0) {
flag = true;
}
}
return flag;
}
}
2. AOP代理类
package com.krt.common.aspect;
import com.krt.common.util.WordFilter;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.annotation.Before;
import org.aspectj.lang.annotation.Pointcut;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import javax.servlet.http.HttpServletRequest;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
/**
* @author: lyp
* @Date: 2021/8/12 14:51
* @Description:
*/
@Aspect
@Component
public class SensitiveWord {
@Autowired
private WordFilter wordFilter;
@Pointcut("execution(* com.krt.*.controller..*Controller*.*insert*(..)) || execution(* com.krt.*.controller..*Controller*.update(..))")
public void SensitiveWord() {
}
//@Around("@annotation(org.springframework.web.bind.annotation.PostMapping)||@annotation(org.springframework.web.bind.annotation.PutMapping)")
@Around("SensitiveWord()")
public Object doBefore(ProceedingJoinPoint point) throws Throwable {
//所有的参数对象
for (Object arg : point.getArgs()) {
//参数对象,通过反射将String类型的值进行敏感词过滤
Class<?> aClass = arg.getClass();
//递归遍历,将所有String参数进行敏感词匹配
foundString(aClass, arg);
}
return point.proceed();
}
/**
* @Description: 递归将String替换
*/
public Class<?> foundString(Class clazz, Object arg) throws IllegalAccessException, InvocationTargetException {
Field[] declaredFields = clazz.getDeclaredFields();
for (Field declaredField : declaredFields) {
Class<?> type = declaredField.getType();
if (type == String.class && !Modifier.toString(declaredField.getModifiers()).contains("final")) {
//如果是String类型,进行关键词匹配 且要排除final修饰的字段
declaredField.setAccessible(true);
String value = (String) declaredField.get(arg);
declaredField.set(arg, wordFilter.replaceWords(value));
} else if (type.getPackage() != null && type.getPackage().getName().contains("com.krt")) {
Method[] methods = clazz.getMethods();
for (Method method : methods) {
String name = method.getName();
if (name.toLowerCase().contains("get" + declaredField.getName().toLowerCase())) {
Object invoke = method.invoke(arg);
this.foundString(type, invoke);
break;
}
}
}
}
return clazz;
}
}
至此简要的功能已经实现