ssm框架下基于DFA算法实现敏感词过滤

上一次我们简单的介绍了一下DFA算法,原文链接DFA算法简单理解实现
这一次我们基于这个算法模型实现我们的敏感词过滤功能,本功能实现背景为ssm框架和ehcache,不多说了,上才艺

一.实现思路

首先宏观的梳理了一下项目,项目中所有会往数据库中插入数据的接口方法名都是由insert或者update开头,且请求方式都为post或put,由此我们可以有以下两种思路:

  1. 使用spring aop 代理所有的post或put请求方法,对其中传入的String参数进行校验,切入点可以定义为:

    @Around("@annotation(org.springframework.web.bind.annotation.PostMapping)||@annotation(org.springframework.web.bind.annotation.PutMapping)")
    
  2. 使用spring aop 代理所有controller层insert或update开头的方法,对其中传入的String参数进行校验,切入点可以定义为:

    @Pointcut("execution(* com.krt.*.controller..*Controller*.*insert*(..)) || execution(* com.krt.*.controller..*Controller*.update(..))")
    public void SensitiveWord() {
    
    }
    

    因为第一种方式对项目影响太广,所以暂时放弃,采用第二种方式

二.代码实现

1. 敏感词检测类

package com.krt.common.util;

import com.krt.oa.mapper.NoticeMapper;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.*;

/**
 * @author: lyp
 * @Date: 2021/8/12 14:39
 * @Description:
 */
@Component
public class WordFilter {
    /**
     * 敏感词替换字符
     */
    private final static String REPLACE_CHAR = "*";

    /**
     * dfa算法后的敏感词模型
     */
    private static HashMap sensitiveWordMap;
    /**
     * 最小匹配规则
     */
    private static int minMatchType = 1;
    /**
     * 最大匹配规则
     */
    private static int maxMatchType = 2;

    @Autowired
    private NoticeMapper sensitiveWordMapper;

    public String replaceWords(String text) {
        EhcacheUtil ehcacheUtil = EhcacheUtil.getInstance();
        if (StringUtils.isBlank(text)) {
            return text;
        }

        List<String> words = (List<String>) ehcacheUtil.get("sensitiveWord", "SensitiveWord");
        if (words == null) {
            words = sensitiveWordMapper.listStr();
            //更新
            ehcacheUtil.put("sensitiveWord", "SensitiveWord", words);
        }
        Set<String> keyWordSet = new HashSet<>(words);
        //缓存获取敏感词汇原记录
        return WordFilter.replaceSensitiveWord(keyWordSet, text, WordFilter.maxMatchType);
    }

    /**
     * 替换敏感字字符
     *
     * @param data      敏感字集合
     * @param txt       待检查文本
     * @param matchType 匹配规则
     */
    private static String replaceSensitiveWord(Set<String> data, String txt, int matchType) {
        if (sensitiveWordMap == null) {

            addSensitiveWordToHashMap(data);
        }

        return replaceSensitiveWord(txt, matchType, REPLACE_CHAR);
    }

    /**
     * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
     * 说明:该方法来源于互联网
     */
    private static void addSensitiveWordToHashMap(Set<String> keyWordSet) {
        // 初始化HashMap对象并控制容器的大小
        sensitiveWordMap = new HashMap(keyWordSet.size());
        // 敏感词
        String key = null;
        // 用来按照相应的格式保存敏感词库数据
        Map nowMap = null;
        // 用来辅助构建敏感词库
        Map<String, String> newWorMap = null;
        // 使用一个迭代器来循环敏感词集合
        Iterator<String> iterator = keyWordSet.iterator();
        while (iterator.hasNext()) {
            key = iterator.next();
            // 等于敏感词库,HashMap对象在内存中占用的是同一个地址,所以此nowMap对象的变化,sensitiveWordMap对象也会跟着改变
            nowMap = sensitiveWordMap;
            for (int i = 0; i < key.length(); i++) {
                // 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
                char keyChar = key.charAt(i);

                // 判断这个字是否存在于敏感词库中
                Object wordMap = nowMap.get(keyChar);
                if (wordMap != null) {
                    nowMap = (Map) wordMap;
                } else {
                    newWorMap = new HashMap<>();
                    newWorMap.put("isEnd", "0");
                    nowMap.put(keyChar, newWorMap);
                    nowMap = newWorMap;
                }
                // 如果该字是当前敏感词的最后一个字,则标识为结尾字
                if (i == key.length() - 1) {
                    nowMap.put("isEnd", "1");
                }

            }

        }
    }

    /**
     * 获取敏感词内容
     *
     * @param txt
     * @param matchType
     * @return 敏感词内容
     */
    public static Set<String> getSensitiveWord(String txt, int matchType) {
        Set<String> sensitiveWordList = new HashSet<String>();

        for (int i = 0; i < txt.length(); i++) {
            int length = checkSensitiveWord(txt, i, matchType);
            if (length > 0) {
                // 将检测出的敏感词保存到集合中
                sensitiveWordList.add(txt.substring(i, i + length));
                i = i + length - 1;
            }
        }

        return sensitiveWordList;
    }

    /**
     * 替换敏感词
     *
     * @param txt
     * @param matchType
     * @param replaceChar
     * @return
     */
    public static String replaceSensitiveWord(String txt, int matchType, String replaceChar) {
        String resultTxt = txt;
        Set<String> set = getSensitiveWord(txt, matchType);
        Iterator<String> iterator = set.iterator();
        String word = null;
        String replaceString = null;
        while (iterator.hasNext()) {
            word = iterator.next();
            replaceString = getReplaceChars(replaceChar, word.length());
            resultTxt = resultTxt.replaceAll(word, replaceString);
        }

        return resultTxt;
    }

    /**
     * 替换敏感词内容
     *
     * @param replaceChar
     * @param length
     * @return
     */
    private static String getReplaceChars(String replaceChar, int length) {
        String resultReplace = replaceChar;
        for (int i = 1; i < length; i++) {
            resultReplace += replaceChar;
        }

        return resultReplace;
    }

    /**
     * 检查敏感词数量
     *
     * @param txt
     * @param beginIndex
     * @param matchType
     * @return
     */
    public static int checkSensitiveWord(String txt, int beginIndex, int matchType) {
        boolean flag = false;
        // 记录敏感词数量
        int matchFlag = 0;
        char word = 0;
        Map nowMap = sensitiveWordMap;
        for (int i = beginIndex; i < txt.length(); i++) {
            word = txt.charAt(i);
            // 判断该字是否存在于敏感词库中
            nowMap = (Map) nowMap.get(word);
            if (nowMap != null) {
                matchFlag++;
                // 判断是否是敏感词的结尾字,如果是结尾字则判断是否继续检测
                if ("1".equals(nowMap.get("isEnd"))) {
                    flag = true;
                    // 判断过滤类型,如果是小过滤则跳出循环,否则继续循环
                    if (minMatchType == matchType) {
                        break;
                    }
                }
            } else {
                break;
            }
        }
        if (!flag) {
            matchFlag = 0;
        }
        return matchFlag;
    }

    /**
     * 敏感词库敏感词数量
     *
     * @return
     */
    public int getWordSize() {
        if (sensitiveWordMap == null) {
            return 0;
        }
        return sensitiveWordMap.size();
    }

    /**
     * 是否包含敏感词
     *
     * @param txt
     * @param matchType
     * @return
     */
    public static boolean isContainSensitiveWord(String txt, int matchType) {
        boolean flag = false;
        for (int i = 0; i < txt.length(); i++) {
            int matchFlag = checkSensitiveWord(txt, i, matchType);
            if (matchFlag > 0) {
                flag = true;
            }
        }
        return flag;
    }


}

2. AOP代理类

package com.krt.common.aspect;

import com.krt.common.util.WordFilter;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.annotation.Before;
import org.aspectj.lang.annotation.Pointcut;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;

import javax.servlet.http.HttpServletRequest;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;

/**
 * @author: lyp
 * @Date: 2021/8/12 14:51
 * @Description:
 */
@Aspect
@Component
public class SensitiveWord {

    @Autowired
    private WordFilter wordFilter;

    @Pointcut("execution(* com.krt.*.controller..*Controller*.*insert*(..)) || execution(* com.krt.*.controller..*Controller*.update(..))")
    public void SensitiveWord() {

    }

    //@Around("@annotation(org.springframework.web.bind.annotation.PostMapping)||@annotation(org.springframework.web.bind.annotation.PutMapping)")
    @Around("SensitiveWord()")
    public Object doBefore(ProceedingJoinPoint point) throws Throwable {
        //所有的参数对象
        for (Object arg : point.getArgs()) {
            //参数对象,通过反射将String类型的值进行敏感词过滤
            Class<?> aClass = arg.getClass();
            //递归遍历,将所有String参数进行敏感词匹配
            foundString(aClass, arg);
        }
        return point.proceed();
    }

    /**
     * @Description: 递归将String替换
     */
    public Class<?> foundString(Class clazz, Object arg) throws IllegalAccessException, InvocationTargetException {
        Field[] declaredFields = clazz.getDeclaredFields();
        for (Field declaredField : declaredFields) {
            Class<?> type = declaredField.getType();
            if (type == String.class && !Modifier.toString(declaredField.getModifiers()).contains("final")) {
                //如果是String类型,进行关键词匹配 且要排除final修饰的字段
                declaredField.setAccessible(true);
                String value = (String) declaredField.get(arg);
                declaredField.set(arg, wordFilter.replaceWords(value));
            } else if (type.getPackage() != null && type.getPackage().getName().contains("com.krt")) {
                Method[] methods = clazz.getMethods();
                for (Method method : methods) {
                    String name = method.getName();
                    if (name.toLowerCase().contains("get" + declaredField.getName().toLowerCase())) {
                        Object invoke = method.invoke(arg);
                        this.foundString(type, invoke);
                        break;
                    }
                }
            }
        }
        return clazz;
    }
}

至此简要的功能已经实现

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值