C#&Lua 过滤筛选脏字

15 篇文章 1 订阅
10 篇文章 1 订阅

参考自https://www.jianshu.com/p/00ab90e9e7fb

他写的是C#版本我们项目要用所以翻译成了Lua版本

最下面是C#代码 我也拷贝过来了

Luad代码:

local class = mm.class("trie")

local trieNode = {}
trieNode._isEnd = false
trieNode.subNodes = {}

function trieNode:AddSubNode(key,node)
    trieNode.subNodes[key] = node
end

function trieNode:GetSubNode(key)
    if trieNode.subNodes[key] then
        return trieNode.subNodes[key]
    end
    return nil
end

function trieNode:newinstance(instance)
    if not instance then
        instance = {}
    end
    setmetatable(instance,{__index = trieNode})
    return instance
end

function class:Init()
    self.default_replacement = "*"
    self.newtrieNode = trieNode:newinstance()
end

function class:IsSymbol(c)
    local ic = string.byte(c)
    return not((c >= '0' and c <= '9') or (c >= 'a' and c <= 'z') or (c >= 'A'and c <= 'Z')) and (ic < 0x2E80 or ic > 0x9FFF)
end

function class:AddWords(words)
    if words == nil or #words == 0 then
        return
    end
    for i=1,#words do
        self:AddWord(words[i])
    end
end

function class:AddWord(word)
    if word == nil or word == "" then
        return
    end
    local tempNode = self.newtrieNode
    word = self:getstrs(word)
    local len = #word
    for i=1,len do
        local c = word[i]
        if self:IsSymbol(c) then
            local node = tempNode:GetSubNode(c)
            if node == nil then
                node = trieNode:newinstance()
                tempNode:AddSubNode(c,node)
            end
            tempNode = node
            if i == len then
                tempNode._isEnd = true
            end
        end
    end
end

function class:Filter(str)
    if str == nil or str == "" then
        return
    end
    local tempdefaultstr = self.default_replacement
    local result = ""
    local tempNode = self.newtrieNode
    local begin = 1
    local position = 1
    str = self:getstrs(str)
    local len = #str
    while(position <= len)do
        local c = str[position]
        if not self:IsSymbol(c) then
            if tempNode == self.newtrieNode then
                result = result..c
                begin = begin + 1
            end
            position = position + 1
        else
            tempNode = tempNode:GetSubNode(c)
            if tempNode == nil then
                result = result..str[begin]
                position = position + 1
                begin = position
                tempNode = self.newtrieNode
            else
                result = result..tempdefaultstr
                position = position + 1
                begin = position
                tempNode = self.newtrieNode
            end
        end
    end
    return result
end

function class:getstrs(str)
    if str == nil or string.len(str) == 0 then return {} end
    local tab = {}
    for uchar in string.gmatch(str, "[%z\1-\127\194-\244][\128-\191]*") do
        tab[#tab+1] = uchar
    end
    return tab
end

function class:Clear()
    self = nil
end

return class

使用方式

local tab = {"shit","傻逼","笨蛋"}
trie:AddWords(tab)
local str = trie:Filter("你是傻逼啊shit你个小笨蛋")
print("str:",str)

输出结果如下

str: 你是**啊shit你个小**

 C#代码:

/TrieNode.cs

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class TrieNode
{
    /**
     * 标识当前结点是否是一个“关键词”的最后一个结点
     * true 关键词的终结 false 继续
     */
    private bool _isEnd = false;

    /**
     * 用map来存储当前结点的所有子节点,非常的方便
     * key 下一个字符 value 对应的结点
     */
    private Dictionary<char, TrieNode> subNodes = new Dictionary<char, TrieNode>();

    /// <summary>
    /// 向指定位置添加结点树
    /// </summary>
    /// <param name="key"></param>
    /// <param name="node"></param>
    public void AddSubNode(char key, TrieNode node)
    {
        subNodes.Add(key, node);
    }

    /// <summary>
    /// 根据key获得相应的子节点
    /// </summary>
    /// <param name="key"></param>
    /// <returns></returns>
    public TrieNode GetSubNode(char key)
    {
        if (subNodes.ContainsKey(key))
            return subNodes[key];
        return null;
    }

    /// <summary>
    /// 判断是否是关键字的结尾
    /// </summary>
    public bool isKeyWordEnd
    {
        get { return _isEnd; }
        set
        {
            _isEnd = value;
        }
    }
}
Trie.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

public class Trie
{
    //默认敏感词替换符
    private const String DEFAULT_REPLACEMENT = "敏感词";
    //根节点
    private TrieNode rootNode = new TrieNode();

    /// <summary>
    /// 判断是否是一个符号
    /// </summary>
    /// <param name="c"></param>
    /// <returns></returns>
    private bool IsSymbol(char c)
    {
        int ic = c;
        // 0x2E80-0x9FFF 东亚文字范围
        return !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) && (ic < 0x2E80 || ic > 0x9FFF);
    }

    /// <summary>
    /// 根据输入的字符串列表构造字典树
    /// </summary>
    /// <param name="words"></param>
    public void AddWords(List<string> words)
    {
        if (words == null || words.Count == 0) return;
        for (int i = 0, count = words.Count; i < count; i++)
        {
            AddWord(words[i]);
        }
    }

    /// <summary>
    ///  根据输入的字符串构造字典树
    /// </summary>
    /// <param name="word"></param>
    public void AddWord(string word)
    {
        if (string.IsNullOrEmpty(word))
            return;
        TrieNode tempNode = rootNode;
        // 循环每个字节
        for (int i = 0; i < word.Length; ++i)
        {
            char c = word[i];
            // 过滤字符
            if (IsSymbol(c))
            {
                continue;
            }
            TrieNode node = tempNode.GetSubNode(c);

            if (node == null)
            { // 没初始化
                node = new TrieNode();
                tempNode.AddSubNode(c, node);
            }

            tempNode = node;

            if (i == word.Length - 1)
            {
                // 关键词结束, 设置结束标志
                tempNode.isKeyWordEnd = true;
            }
        }
    }

    /// <summary>
    /// 过滤敏感词
    /// </summary>
    /// <param name="text"></param>
    /// <returns></returns>
    public string Filter(string text)
    {
        if (string.IsNullOrEmpty(text))
        {
            return text;
        }
        String replacement = DEFAULT_REPLACEMENT;
        StringBuilder result = new StringBuilder();

        TrieNode tempNode = rootNode;
        int begin = 0; // 回滚数
        int position = 0; // 当前比较的位置

        while (position < text.Length)
        {
            char c = text[position];
            // 字符直接跳过
            if (IsSymbol(c))
            {
                if (tempNode == rootNode)
                {
                    result.Append(c);//直接放入结果集
                    ++begin;//起始索引位++
                }
                ++position;//索引++
                continue;
            }

            tempNode = tempNode.GetSubNode(c);

            // 当前位置的匹配结束
            if (tempNode == null)
            {
                // 以begin开始的字符串不存在敏感词
                result.Append(text[begin]);
                // 跳到下一个字符开始测试
                position = begin + 1;
                begin = position;
                // 回到树初始节点
                tempNode = rootNode;
            }
            else if (tempNode.isKeyWordEnd)
            {
                // 发现敏感词, 从begin到position的位置用replacement替换掉
                result.Append(replacement);
                position = position + 1;
                begin = position;
                tempNode = rootNode;
            }
            else
            {
                ++position;
            }
        }

        result.Append(text.Substring(begin));

        return result.ToString();
    }
}
FilterView .cs

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;

public class FilterView : MonoBehaviour
{
    public InputField inputField;
    public Text resultTxt;
    private List<string> words;
    private Trie trie;

    void Start()
    {
        trie = new Trie();
        words = new List<string>() { "shit", "傻逼", "笨蛋" };
        trie.AddWords(words);
    }

    #region Event Handler
    public void OnFilterBtnClickHandler()
    {
        string text = trie.Filter(inputField.text);
        Debug.Log("过滤结果:" + text);
        resultTxt.text = text;
    }
    #endregion
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小张不爱写代码

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值