DFA算法实现过滤多家公司自定义敏感字

背景

因为最近有通讯有个需求,说需要让多家客户公司可以自定义敏感词过滤掉他们自定义的规则,选择了DFA算法来做,不过和以前传统了DFA写法不太一样了

模式图

这里写图片描述

直接上代码

public class KeywordFilter {
//  private static ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    public static Map<String, HashMap> currentMap = new ConcurrentHashMap<String, HashMap>();
    public static Map nowhash = null;
    public static Object wordMap;// map子节点

    // 不建立对象
    private KeywordFilter() {
    }

    private static String getKey(int companyId) {
        return "companyId" + companyId;
    }

    /*
     * <p>说明:清扫内容</p>
     * 
     * @author:姚旭民
     * 
     * @data:2017-8-22 上午10:13:11
     */
    public static void clear() {
        try {
            currentMap.clear();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
        }

    }

    /*
     * <p>说明:各个渠道的过滤字符</p>
     * 
     * @author:姚旭民
     * 
     * @data:2017-8-20 下午2:55:06
     */
    public static void saveKeywords(int companyId, List<String> keywords) {
        try {
            Map tempAllMap = currentMap;
            String key = getKey(companyId);
            int l = keywords.size();
            int il;
            Map tempMap;
            for (int i = 0; i < l; i++) {
                String key2 = keywords.get(i).trim();// 去掉空白
                nowhash = currentMap;
                il = key2.length();
                for (int j = 0; j < il; j++) {
                    char word = key2.charAt(j);
                    tempMap = (Map) nowhash.get(word);
                    wordMap = nowhash.get(word);
                    if (wordMap != null) {// 检查数据
                        if (!tempMap.containsKey(key)) {
                            nowhash.put(key, 0);
                        }
                        nowhash = (HashMap) wordMap;
                    } else {
                        HashMap<String, String> newWordHash = new HashMap<String, String>();
                        newWordHash.put(key, "0");
                        nowhash.put(word, newWordHash);
                        nowhash = newWordHash;
                    }
                    if (j == il - 1) {
                        nowhash.put(key, "1");
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            nowhash = null;
            wordMap = null;
        }
    }

    /*
     * <p>说明:替换掉对应的渠道规定掉敏感字</p>
     * 
     * @author:姚旭民
     * 
     * @data:2017-8-20 上午11:41:47
     */
    public static List<String> repword(int companyId, String txt) {
        Map tempMap = currentMap;
        List<String> result = new ArrayList<String>();
        String key = getKey(companyId);
        nowhash = currentMap;
        int l = txt.length();
        char word;
        String keywordStr = "";
        String keyStatu;
        StringBuilder keyword = new StringBuilder();// 敏感字
        for (int i = 0; i < l; i++) {
            word = txt.charAt(i);
            wordMap = nowhash.get(word);
            if (wordMap != null) {// 找到类似敏感字的字体,开始查询
                keyword.append(word);
                Object te = nowhash = (HashMap) wordMap;
                // 遍历到这一步,就符合完整的关键字模板
                if (nowhash.get(key) != null
                        && nowhash.get(key).toString().equals("1")) {// 确定是敏感字,开始替换
                    if (i < l - 1 && nowhash.get(txt.charAt(i + 1)) != null) {// 优先过滤长敏感词,去掉就槟城了优先过滤段敏感词
                        continue;
                    }
                    txt = txt.replaceAll(keyword.toString(), "*");
                    nowhash = currentMap;
                    keywordStr += keyword.toString() + ",";
                    i = i - keyword.length() + 1;
                    l = txt.length();// 重新获取字符长度
                    keyword.delete(0, keyword.length());// 清空数据
                }
            } else {// 这个字不是敏感字,直接排除
                nowhash = currentMap;
                keyword.delete(0, keyword.length());// 清空数据
                continue;
            }
        }
        // 清除内存指向
        nowhash = null;
        wordMap = null;
        result.add(txt);
        result.add(keywordStr.length() - 1 > 0 ? keywordStr.substring(0,
                keywordStr.length() - 1) : keywordStr);
        return result;

    }

    /*
     * <p>说明:检查是否存在敏感字</p>
     * 
     * @author:姚旭民
     * 
     * @data:2017-8-20 下午3:00:06 专门设计成私有的,如果没有理由,别改动他
     */
    private static int checkKeyWords(String txt, int companyId, int begin) {
        int result = 0;
        String key = getKey(companyId);
        try {
            nowhash = currentMap;
            int l = txt.length();
            char word = 0;
            for (int i = begin; i < l; i++) {
                word = txt.charAt(i);
                wordMap = nowhash.get(word);
                if (wordMap != null) {
                    result++;
                    nowhash = (HashMap) wordMap;
                    if (((String) nowhash.get(key)).equals("1")) {
                        nowhash = null;
                        wordMap = null;
                        return result;
                    }
                } else {
                    result = 0;
                    break;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            nowhash = null;
            wordMap = null;
            return result;
        }
    }

    /*
     * <p>说明:返回检查的文本中包含的敏感字</p>
     * 
     * @author:姚旭民
     * 
     * @data:2017-8-20 下午3:32:53
     */
    public static String getTxtKeyWords(String txt, int companyId) {
        String result = null;
        StringBuilder temp = new StringBuilder();
        String key;
        int l = txt.length();
        for (int i = 0; i < l;) {
            int len = checkKeyWords(txt, companyId, i);
            if (len > 0) {
                key = (txt.substring(i, i + len));// 挑选出来的关键字
                temp.append(key + ",");
                txt = txt.replaceAll(key, "");// 挑选出来的关键字替换成空白,加快挑选速度
                l = txt.length();
            } else {
                i++;
            }
        }
        if (temp.length() > 0) {
            result = temp.substring(0, temp.length() - 1);
        }
        return result;
    }

    /*
     * <p>说明:判断文中是否包含渠道规定的敏感字</p>
     * 
     * @author:姚旭民
     * 
     * @data:2017-8-20 下午3:33:19
     */
    public boolean isKeyWords(String txt, int companyId) {
        for (int i = 0; i < txt.length(); i++) {
            int len = checkKeyWords(txt, companyId, i);
            if (len > 0) {
                return true;
            }
        }
        return false;
    }

    public static void main(String[] arg) {
        List<String> keywords = new ArrayList<String>();
        keywords.add("傻×");
        keywords.add("汉奸");
        keywords.add("草");
        keywords.add("草泥马");
        KeywordFilter.saveKeywords(1, keywords);
        String txt = "是傻×汉奸傻A傻B傻C傻D汉奸傻×草泥马";
        List<String> list = repword(1, txt);
        System.out.println("文中包含的敏感字为:" + list.get(1));
        System.out.println("原文:" + txt);
        System.out.println("敏感字过滤后:" + list.get(0));
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值