/**
* 违禁词过滤器 DFA算法
*/
class BannedWordFilter {
// 是否已初始化
private _inited: boolean;
// 脏词库
private dirtyWordArray: Array<string> = ["垃圾", "辣鸡", "sb", "智障", "呵呵", "jj", "GG", "曹", "鸡儿", "尼玛", "你麻痹", "傻子", "杀", "bb", "nb", "牛逼", "日你"];
// 检测源字符串
private sourceWord: string;
// 代替敏感字的字符
private repChar = '*';
// 脏词库构造的 DFA 敏感词索引树结构
private treeRoot: WordNode;
public constructor() {
//this.dirtyWordArray = null;
}
// 单例
private static _ins: BannedWordFilter;
public static get ins() {
if (!this._ins) {
this._ins = new BannedWordFilter();
}
if (!this._ins._inited) {
this._ins.initTreeConfs();
}
return this._ins;
}
public clear(): void {
this.dirtyWordArray = null;
this.treeRoot = null;
this._inited = false;
}
//初始化树,把敏感词集构造成树
private initTreeConfs(): void {
if (!this.dirtyWordArray || this.dirtyWordArray.length <= 0)
return;
//这是一个预处理步骤,生成敏感词索引树,功耗大于查找时使用的方法,但只在程序开始时调用一次。
this.treeRoot = new WordNode();//树根
this.treeRoot.value = '';//初始化赋值
let leng = this.dirtyWordArray.length;
for (let i = 0; i < leng; i++) {
let word = this.dirtyWordArray[i];
let charCount = word.length;
if (charCount > 0) {
let node = this.treeRoot;
for (let j = 0; j < charCount; j++) {
let char = word.slice(j, j + 1);
let tempNode = node.getChild(char);
if (tempNode) {
node = tempNode;
} else {
// 树根
node = node.addChild(char)
}
}
// 词尾标识
node.isEnd = true;
}
}
this.dirtyWordArray = null;
this._inited = true;
console.log("treeRoot:", this.treeRoot);
}
/**
* 检测一个词并返回是否带敏感词和替换敏感词之后的结果
* @param word 检测的词
* @param repChar 代替敏感字的字符
*/
public filterWord(word: string, repChar?: string): { hasDirty: boolean, filteredWord: string } {
let has_disty = false;
this.sourceWord = word;//词源
let filtered_word = word;//过滤保存
let charCount = filtered_word.length;
// 确保敏感词索引树有内容
if (charCount > 0 && this.treeRoot) {
let char: string;
// 敏感字替换字符
let _repChar = repChar ? repChar : this.repChar;
let node = this.treeRoot;
let chilhNode: WordNode;
let dirtyWord: string;
for (let i = 0; i < charCount; i++) {
char = this.sourceWord.slice(i, i + 1);
chilhNode = node.getChild(char);
if (!chilhNode) {
dirtyWord = '';
//重新开始下个敏感词检测
node = this.treeRoot;
}
chilhNode = node.getChild(char);
if (chilhNode) {
dirtyWord += chilhNode.value;
if (chilhNode.isEnd) {
has_disty = true;
if (dirtyWord.length > 0) {
// 替换敏感字
filtered_word = filtered_word.replace(dirtyWord, this.getReplaceStr(_repChar, dirtyWord.length));
}
}
node = chilhNode;
}
}
}
return { hasDirty: has_disty, filteredWord: filtered_word };
}
// 替换字符串
public getReplaceStr(repChar: string, leng: number): string {
let result = '';
for (let i = 0; i < leng; i++) {
result += repChar;
}
return result;
}
}
//敏感词树结构
class WordNode {
//是否敏感词尾
public isEnd = false;
//父节点
public parentNode: WordNode;
//子节点
public children: Dictionary<string, WordNode>;
public value: string;
public constructor() {
this.children = new Dictionary<string, WordNode>();
}
public getChild(name: string): WordNode {
return this.children.get(name);
}
public addChild(char: string): WordNode {
let node = new WordNode();
node.value = char;
node.parentNode = this;
this.children.add(char, node);
return node;
}
}
游戏中敏感词屏蔽操作(DFA算法)
最新推荐文章于 2024-07-17 16:17:38 发布