参考自https://www.jianshu.com/p/00ab90e9e7fb
他写的是C#版本我们项目要用所以翻译成了Lua版本
最下面是C#代码 我也拷贝过来了
Luad代码:
local class = mm.class("trie")
local trieNode = {}
trieNode._isEnd = false
trieNode.subNodes = {}
function trieNode:AddSubNode(key,node)
trieNode.subNodes[key] = node
end
function trieNode:GetSubNode(key)
if trieNode.subNodes[key] then
return trieNode.subNodes[key]
end
return nil
end
function trieNode:newinstance(instance)
if not instance then
instance = {}
end
setmetatable(instance,{__index = trieNode})
return instance
end
function class:Init()
self.default_replacement = "*"
self.newtrieNode = trieNode:newinstance()
end
function class:IsSymbol(c)
local ic = string.byte(c)
return not((c >= '0' and c <= '9') or (c >= 'a' and c <= 'z') or (c >= 'A'and c <= 'Z')) and (ic < 0x2E80 or ic > 0x9FFF)
end
function class:AddWords(words)
if words == nil or #words == 0 then
return
end
for i=1,#words do
self:AddWord(words[i])
end
end
function class:AddWord(word)
if word == nil or word == "" then
return
end
local tempNode = self.newtrieNode
word = self:getstrs(word)
local len = #word
for i=1,len do
local c = word[i]
if self:IsSymbol(c) then
local node = tempNode:GetSubNode(c)
if node == nil then
node = trieNode:newinstance()
tempNode:AddSubNode(c,node)
end
tempNode = node
if i == len then
tempNode._isEnd = true
end
end
end
end
function class:Filter(str)
if str == nil or str == "" then
return
end
local tempdefaultstr = self.default_replacement
local result = ""
local tempNode = self.newtrieNode
local begin = 1
local position = 1
str = self:getstrs(str)
local len = #str
while(position <= len)do
local c = str[position]
if not self:IsSymbol(c) then
if tempNode == self.newtrieNode then
result = result..c
begin = begin + 1
end
position = position + 1
else
tempNode = tempNode:GetSubNode(c)
if tempNode == nil then
result = result..str[begin]
position = position + 1
begin = position
tempNode = self.newtrieNode
else
result = result..tempdefaultstr
position = position + 1
begin = position
tempNode = self.newtrieNode
end
end
end
return result
end
function class:getstrs(str)
if str == nil or string.len(str) == 0 then return {} end
local tab = {}
for uchar in string.gmatch(str, "[%z\1-\127\194-\244][\128-\191]*") do
tab[#tab+1] = uchar
end
return tab
end
function class:Clear()
self = nil
end
return class
使用方式
local tab = {"shit","傻逼","笨蛋"}
trie:AddWords(tab)
local str = trie:Filter("你是傻逼啊shit你个小笨蛋")
print("str:",str)
输出结果如下
str: 你是**啊shit你个小**
C#代码:
/TrieNode.cs
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class TrieNode
{
/**
* 标识当前结点是否是一个“关键词”的最后一个结点
* true 关键词的终结 false 继续
*/
private bool _isEnd = false;
/**
* 用map来存储当前结点的所有子节点,非常的方便
* key 下一个字符 value 对应的结点
*/
private Dictionary<char, TrieNode> subNodes = new Dictionary<char, TrieNode>();
/// <summary>
/// 向指定位置添加结点树
/// </summary>
/// <param name="key"></param>
/// <param name="node"></param>
public void AddSubNode(char key, TrieNode node)
{
subNodes.Add(key, node);
}
/// <summary>
/// 根据key获得相应的子节点
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
public TrieNode GetSubNode(char key)
{
if (subNodes.ContainsKey(key))
return subNodes[key];
return null;
}
/// <summary>
/// 判断是否是关键字的结尾
/// </summary>
public bool isKeyWordEnd
{
get { return _isEnd; }
set
{
_isEnd = value;
}
}
}
Trie.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
public class Trie
{
//默认敏感词替换符
private const String DEFAULT_REPLACEMENT = "敏感词";
//根节点
private TrieNode rootNode = new TrieNode();
/// <summary>
/// 判断是否是一个符号
/// </summary>
/// <param name="c"></param>
/// <returns></returns>
private bool IsSymbol(char c)
{
int ic = c;
// 0x2E80-0x9FFF 东亚文字范围
return !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) && (ic < 0x2E80 || ic > 0x9FFF);
}
/// <summary>
/// 根据输入的字符串列表构造字典树
/// </summary>
/// <param name="words"></param>
public void AddWords(List<string> words)
{
if (words == null || words.Count == 0) return;
for (int i = 0, count = words.Count; i < count; i++)
{
AddWord(words[i]);
}
}
/// <summary>
/// 根据输入的字符串构造字典树
/// </summary>
/// <param name="word"></param>
public void AddWord(string word)
{
if (string.IsNullOrEmpty(word))
return;
TrieNode tempNode = rootNode;
// 循环每个字节
for (int i = 0; i < word.Length; ++i)
{
char c = word[i];
// 过滤字符
if (IsSymbol(c))
{
continue;
}
TrieNode node = tempNode.GetSubNode(c);
if (node == null)
{ // 没初始化
node = new TrieNode();
tempNode.AddSubNode(c, node);
}
tempNode = node;
if (i == word.Length - 1)
{
// 关键词结束, 设置结束标志
tempNode.isKeyWordEnd = true;
}
}
}
/// <summary>
/// 过滤敏感词
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public string Filter(string text)
{
if (string.IsNullOrEmpty(text))
{
return text;
}
String replacement = DEFAULT_REPLACEMENT;
StringBuilder result = new StringBuilder();
TrieNode tempNode = rootNode;
int begin = 0; // 回滚数
int position = 0; // 当前比较的位置
while (position < text.Length)
{
char c = text[position];
// 字符直接跳过
if (IsSymbol(c))
{
if (tempNode == rootNode)
{
result.Append(c);//直接放入结果集
++begin;//起始索引位++
}
++position;//索引++
continue;
}
tempNode = tempNode.GetSubNode(c);
// 当前位置的匹配结束
if (tempNode == null)
{
// 以begin开始的字符串不存在敏感词
result.Append(text[begin]);
// 跳到下一个字符开始测试
position = begin + 1;
begin = position;
// 回到树初始节点
tempNode = rootNode;
}
else if (tempNode.isKeyWordEnd)
{
// 发现敏感词, 从begin到position的位置用replacement替换掉
result.Append(replacement);
position = position + 1;
begin = position;
tempNode = rootNode;
}
else
{
++position;
}
}
result.Append(text.Substring(begin));
return result.ToString();
}
}
FilterView .cs
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
public class FilterView : MonoBehaviour
{
public InputField inputField;
public Text resultTxt;
private List<string> words;
private Trie trie;
void Start()
{
trie = new Trie();
words = new List<string>() { "shit", "傻逼", "笨蛋" };
trie.AddWords(words);
}
#region Event Handler
public void OnFilterBtnClickHandler()
{
string text = trie.Filter(inputField.text);
Debug.Log("过滤结果:" + text);
resultTxt.text = text;
}
#endregion
}