DFA 字典树查找
using System;
using System.Collections.Generic;
using System.Linq;
using UnityEngine;
public class hashWordNode
{
public Dictionary<char, hashWordNode> childs = new Dictionary<char, hashWordNode>();
public bool isEnd;
public char key;
public hashWordNode parent;
/// <inheritdoc />
public override string ToString()
{
return string.Format("Childs.Count: {0}, IsEnd: {1}, Key: {2}", childs.Count, isEnd, key /*, parent*/);
}
public void AddRootNode(char[] words)
{
var node = this;
char letter;
for (var i = 0; i < words.Length; i++)
{
letter = words[i];
if (!node.childs.ContainsKey(letter))
{
node.childs.Add(letter, new hashWordNode());
node.childs[letter].key = letter;
}
node.childs[letter].parent = node;
node = node.childs[letter];
if (i == words.Length - 1) //last
node.isEnd = true;
}
}
/// <summary>
/// 遍历所有节点
/// </summary>
/// <param name="node"></param>
public void PrintStruct(hashWordNode node)
{
var childsList = node.childs.ToList();
foreach (var VARIABLE in childsList)
{
Debug.Log(VARIABLE.Value);
PrintStruct(VARIABLE.Value);
}
}
/// <summary>
/// 查询
/// </summary>
/// <param name="words"></param>
/// <param name="index"></param>
/// <returns></returns>
public hashWordNode Cheak(char[] words, int index)
{
var node = this;
char letter;
for (var i = index; i < words.Length; i++)
{
letter = words[i];
var isHas = node.childs.ContainsKey(letter);
if (isHas)
{
node = node.childs[letter];
if (node.isEnd) return node;
}
}
return null;
}
/// <summary>
/// 查询
/// </summary>
/// <param name="words"></param>
/// <param name="index"></param>
/// <returns></returns>
public hashWordNode Cheak(char[] words, int index, ref string word)
{
var node = this;
char letter;
for (var i = index; i < words.Length; i++)
{
letter = words[i];
var isHas = node.childs.ContainsKey(letter);
if (isHas)
{
word += letter + "";
node = node.childs[letter];
if (node.isEnd) return node;
}
}
return null;
}
public static void Test()
{
var input = "cdjp dafa"; //要检测的字符串
var mgck_txt = "cdjp|dafa|dfdz|falu|>jnv|MDMA|SARS|红K|.exe|c-sz"; //敏感词文件
mgck_txt.Trim().Replace("\r\n", "");
var sensitiveWordsArray = mgck_txt.Split('|'); //敏感词组
var theRootNode = new hashWordNode();
foreach (var s in sensitiveWordsArray)
{
if (s.Length == 0) continue;
var word_arr = s.ToCharArray();
theRootNode.AddRootNode(word_arr); //处理敏感词
}
var charArray = input.ToCharArray();
hashWordNode node;
var word = "";
for (var i = 0; i < charArray.Length; i++)
{
node = theRootNode.Cheak(charArray, i, ref word);
if (node != null)
{
Debug.Log("\n含有敏感词\t" + word);
word = "";
//检查所有的敏感词,多次检查
var endIndex = Array.IndexOf(charArray, node.key, i);
if (endIndex != -1) //找到
{
if (endIndex + 1 < charArray.Length)
i = endIndex + 1;
else
break;
}
}
}
//output:
}
}