屏蔽词检测

 

DFA 字典树查找

 

using System;
using System.Collections.Generic;
using System.Linq;
using UnityEngine;

public class hashWordNode
{
    public Dictionary<char, hashWordNode> childs = new Dictionary<char, hashWordNode>();
    public bool isEnd;
    public char key;
    public hashWordNode parent;

    /// <inheritdoc />
    public override string ToString()
    {
        return string.Format("Childs.Count: {0}, IsEnd: {1}, Key: {2}", childs.Count, isEnd, key /*, parent*/);
    }

    public void AddRootNode(char[] words)
    {
        var node = this;
        char letter;
        for (var i = 0; i < words.Length; i++)
        {
            letter = words[i];
            if (!node.childs.ContainsKey(letter))
            {
                node.childs.Add(letter, new hashWordNode());
                node.childs[letter].key = letter;
            }

            node.childs[letter].parent = node;
            node = node.childs[letter];

            if (i == words.Length - 1) //last 
                node.isEnd = true;
        }
    }

    /// <summary>
    ///     遍历所有节点
    /// </summary>
    /// <param name="node"></param>
    public void PrintStruct(hashWordNode node)
    {
        var childsList = node.childs.ToList();
        foreach (var VARIABLE in childsList)
        {
            Debug.Log(VARIABLE.Value);
            PrintStruct(VARIABLE.Value);
        }
    }

    /// <summary>
    ///     查询
    /// </summary>
    /// <param name="words"></param>
    /// <param name="index"></param>
    /// <returns></returns>
    public hashWordNode Cheak(char[] words, int index)
    {
        var node = this;
        char letter;
        for (var i = index; i < words.Length; i++)
        {
            letter = words[i];
            var isHas = node.childs.ContainsKey(letter);
            if (isHas)
            {
                node = node.childs[letter];
                if (node.isEnd) return node;
            }
        }

        return null;
    }

    /// <summary>
    ///     查询
    /// </summary>
    /// <param name="words"></param>
    /// <param name="index"></param>
    /// <returns></returns>
    public hashWordNode Cheak(char[] words, int index, ref string word)
    {
        var node = this;
        char letter;
        for (var i = index; i < words.Length; i++)
        {
            letter = words[i];
            var isHas = node.childs.ContainsKey(letter);
            if (isHas)
            {
                word += letter + "";
                node = node.childs[letter];
                if (node.isEnd) return node;
            }
        }

        return null;
    }

    public static void Test()
    {
        var input = "cdjp  dafa"; //要检测的字符串
        var mgck_txt = "cdjp|dafa|dfdz|falu|>jnv|MDMA|SARS|红K|.exe|c-sz"; //敏感词文件
        mgck_txt.Trim().Replace("\r\n", "");
        var sensitiveWordsArray = mgck_txt.Split('|'); //敏感词组

        var theRootNode = new hashWordNode();
        foreach (var s in sensitiveWordsArray)
        {
            if (s.Length == 0) continue;
            var word_arr = s.ToCharArray();
            theRootNode.AddRootNode(word_arr); //处理敏感词
        }


        var charArray = input.ToCharArray();
        hashWordNode node;
        var word = "";
        for (var i = 0; i < charArray.Length; i++)
        {
            node = theRootNode.Cheak(charArray, i, ref word);
            if (node != null)
            {

                Debug.Log("\n含有敏感词\t" + word);
                word = "";

                //检查所有的敏感词,多次检查
                var endIndex = Array.IndexOf(charArray, node.key, i);
                if (endIndex != -1) //找到
                {
                    if (endIndex + 1 < charArray.Length)
                        i = endIndex + 1;
                    else
                        break;
                }
            }
        }
        //output:   
    }
}

form:https://zhuanlan.zhihu.com/p/84685657

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值