问题描述:主要检测识别文本中夹杂的色情、推广、辱骂、违禁违法等垃圾内容,并进行过滤或者屏蔽。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace FoundationHelper
{
#region 非法关键字过滤 bate 1.1
/// <summary>
/// 非法关键词过滤(自动忽略汉字数字字母间的其他字符)
/// </summary>
public class FilterWord
{
public FilterWord() { }
public FilterWord(string dictionaryPath)
{
this.dictionaryPath = dictionaryPath;
}
private string dictionaryPath = string.Empty;
/// <summary>
/// 词库路径
/// </summary>
public string DictionaryPath
{
get { return dictionaryPath; }
set { dictionaryPath = value; }
}
/// <summary>
/// 内存词典
/// </summary>
private WordGroup[] MEMORYLEXICON = new WordGroup[(int)char.MaxValue];
private string sourctText = string.Empty;
/// <summary>
/// 检测源
/// </summary>
public string SourctText
{
get { return sourctText; }
set { sourctText = value; }
}
/// <summary>
/// 检测源游标
/// </summary>
int cursor = 0;
/// <summary>
/// 匹配成功后偏移量
/// </summary>
int wordlenght = 0;
/// <summary>
/// 检测词游标
/// </summary>
int nextCursor = 0;
private List<string> illegalWords = new List<string>();
/// <summary>
/// 检测到的非法词集
/// </summary>
public List<string> IllegalWords
{
get { return illegalWords; }
}
/// <summary>
/// 判断是否是中文
/// </summary>
/// <param name="character"></param>
/// <re