【转载】高效.NET脏字过滤算法

61 篇文章 6 订阅

BadWordsFilter.cs类

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Collections;
using System.Data;

namespace WNF
{
    public class BadWordsFilter
    {
        private HashSet<string> hash = new HashSet<string>(); //关键字
        private byte[] fastCheck = new byte[char.MaxValue];
        private byte[] fastLength = new byte[char.MaxValue];
        private BitArray charCheck = new BitArray(char.MaxValue);
        private BitArray endCheck = new BitArray(char.MaxValue);
        private int maxWordLength = 0;
        private int minWordLength = int.MaxValue;

        public BadWordsFilter()
        {

        }

        //初始化关键字
        public void Init(DataTable badwords)
        {
            for (int j = 0; j < badwords.Rows.Count; j++)
            {
                string word = badwords.Rows[j][0].ToString();
                maxWordLength = Math.Max(maxWordLength, word.Length);
                minWordLength = Math.Min(minWordLength, word.Length);

                for (int i = 0; i < 7 && i < word.Length; i++)
                {
                    fastCheck[word[i]] |= (byte)(1 << i);
                }

                for (int i = 7; i < word.Length; i++)
                {
                    fastCheck[word[i]] |= 0x80;
                }

                if (word.Length == 1)
                {
                    charCheck[word[0]] = true;
                }
                else
                {
                    fastLength[word[0]] |= (byte)(1 << (Math.Min(7, word.Length - 2)));
                    endCheck[word[word.Length - 1]] = true;

                    hash.Add(word);
                }
            }
        }

        public string Filter(string text, string mask)
        {
            throw new NotImplementedException();
        }

        //检查是否有关键字
        public bool HasBadWord(string text)
        {
            int index = 0;

            while (index < text.Length)
            {
                int count = 1;

                if (index > 0 || (fastCheck[text[index]] & 1) == 0)
                {
                    while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
                }

                char begin = text[index];

                if (minWordLength == 1 && charCheck[begin])
                {
                    return true;
                }

                for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
                {
                    char current = text[index + j];

                    if ((fastCheck[current] & 1) == 0)
                    {
                        ++count;
                    }

                    if ((fastCheck[current] & (1 << Math.Min(j, 7))) == 0)
                    {
                        break;
                    }

                    if (j + 1 >= minWordLength)
                    {
                        if ((fastLength[begin] & (1 << Math.Min(j - 1, 7))) > 0 && endCheck[current])
                        {
                            string sub = text.Substring(index, j + 1);

                            if (hash.Contains(sub))
                            {
                                return true;
                            }
                        }
                    }
                }

                index += count;
            }

            return false;
        }
    }
}

 

 

引用:

                    string sql = "select keywords from tb_keyword";
                    BadWordsFilter badwordfilter = new BadWordsFilter();
                    //初始化关键字
                    badwordfilter.Init(oEtb.GetDataSet(sql).Tables[0]);
                    //检查是否有存在关键字
                    bool a = badwordfilter.HasBadWord(TextBox1.Text);
                    if (a == true)
                    {
                        Page.RegisterClientScriptBlock("a", "<script>alert('该评论含有不合法文字!')</script>");
                    }
                    else
                    {
                        PingLun();//写入评论表

                    }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值