java脏字过滤_脏字过滤

1.[文件]

SensitiveWordFilter.java ~ 7KB

下载(141)

package com.forgov.sharpc.infrastruture.util;

import static java.util.Collections.sort;

import java.util.ArrayList;

import java.util.Collection;

import java.util.Comparator;

import java.util.HashSet;

import java.util.LinkedList;

import java.util.List;

/**

*

* @author loudyn

*

*/

public class SensitiveWordFilter {

private WordNode rootWordNode = new WordNode(null, 'R', WordNode.MIDSIDE_TYPE);

/**

*

* @param word

* @return

*/

public final SensitiveWordFilter addFilterWord(String word) {

char[] chars = word.toCharArray();

if (chars.length > 0) {

insertWordNode(rootWordNode, chars, 0);

}

return this;

}

/**

*

* @param word

* @return

*/

public final SensitiveWordFilter removeFilterWord(String word) {

char[] chars = word.toCharArray();

if (chars.length > 0) {

removeWordNode(rootWordNode, chars, 0);

}

return this;

}

/**

*

* @param wordNode

* @param chars

* @param index

*/

private void removeWordNode(WordNode wordNode, char[] chars, int index) {

convertEnglishAlphabetToLowerCase(chars, index);

WordNode node = searchWordNode(wordNode, chars[index]);

if (null == node) {

return;

}

if (index == chars.length - 1) {

node.type = WordNode.MIDSIDE_TYPE;

}

if (++index < chars.length) {

removeWordNode(node, chars, index);

}

}

/**

*

* @param words

* @return

*/

public final SensitiveWordFilter addFilterWords(Collection words) {

for (String word : words) {

addFilterWord(word);

}

return this;

}

/**

*

* @param words

* @return

*/

public final SensitiveWordFilter removeFilterWords(Collection words) {

for (String word : words) {

removeFilterWord(word);

}

return this;

}

/**

*

* @return

*/

public final SensitiveWordFilter reset() {

destroyFilterWordTree(rootWordNode);

rootWordNode = new WordNode(null, 'R', WordNode.MIDSIDE_TYPE);

return this;

}

/**

*

* @param wordnode

* @return

*/

private SensitiveWordFilter destroyFilterWordTree(WordNode wordnode) {

if (null != wordnode.children && !wordnode.children.isEmpty()) {

for (WordNode node : wordnode.children) {

destroyFilterWordTree(node);

}

}

wordnode.children = null;

wordnode = null;

return this;

}

/**

*

* @param text

* @param replacement

* @return

*/

public final String doFilter(String text, String replacement) {

List hitWords = new LinkedList();

try {

if (beforeFilter(text, replacement)) {

hitWords = hit(text);

return doInternalFilter(text, hitWords, replacement);

}

} catch (Exception e) {

ExceptionUtils.toUnchecked(e);

} finally {

afterFilter(hitWords, text, replacement);

}

return text;

}

/**

*

* @param wordNode

* @param chars

* @param index

*/

private void insertWordNode(WordNode wordNode, char[] chars, int index) {

convertEnglishAlphabetToLowerCase(chars, index);

WordNode node = searchWordNode(wordNode, chars[index]);

if (null == node) {

node = new WordNode(wordNode, chars[index], WordNode.MIDSIDE_TYPE);

wordNode.children.add(node);

}

if (index == chars.length - 1) {

node.type = node.children.isEmpty() ? WordNode.END_TYPE : WordNode.BOTH_TYPE;

}

if (node.parent.type == WordNode.END_TYPE) {

node.parent.type = WordNode.BOTH_TYPE;

}

if (++index < chars.length) {

insertWordNode(node, chars, index);

}

}

/**

*

* @param chars

* @param index

*/

private void convertEnglishAlphabetToLowerCase(char[] chars, int index) {

if (chars[index] >= 65 && chars[index] <= 90) {

chars[index] = (char) (chars[index] + 32);

}

}

/**

*

* @param wordNode

* @param c

* @return

*/

private WordNode searchWordNode(WordNode wordNode, char c) {

List children = wordNode.children;

for (WordNode node : children) {

if (node.value == c) {

return node;

}

}

return null;

}

/**

*

* @param text

* @param replacement

* @return

*/

protected boolean beforeFilter(String text, String replacement) {

// add hook by overwrite this method;

return true;

}

/**

*

* @param text

* @return

*/

private List hit(String text) {

List hitWords = new LinkedList();

List foundChars = new LinkedList();

WordNode node = rootWordNode;

int index = 0;

char[] chars = text.toCharArray();

while (index < chars.length) {

convertEnglishAlphabetToLowerCase(chars, index);

node = searchWordNode(node, chars[index]);

if (null == node) {

node = rootWordNode;

index -= foundChars.size();

foundChars.clear();

}

else if (node.type == WordNode.END_TYPE) {

node = rootWordNode;

foundChars.add(chars[index]);

hitWords.add(charListToString(foundChars));

index -= (foundChars.size() - 1);

foundChars.clear();

}

else if (node.type == WordNode.BOTH_TYPE) {

foundChars.add(chars[index]);

hitWords.add(charListToString(foundChars));

}

else {

foundChars.add(chars[index]);

}

index++;

}

return hitWords;

}

/**

*

* @param hitWords

* @param text

* @param replacement

*/

protected void afterFilter(List hitWords, String text, String replacement) {

// add hook by overwrite this method;

}

/**

*

* @param chars

* @return

*/

private String charListToString(List chars) {

StringBuilder buf = new StringBuilder();

for (char c : chars) {

buf.append(c);

}

return buf.toString();

}

/**

*

* @param text

* @param hitWords

* @param replacement

* @return

*/

private String doInternalFilter(String text, List hitWords, String replacement) {

List copy = new ArrayList(new HashSet(hitWords));

hitWords.clear();

hitWords.addAll(copy);

sort(hitWords, WORD_COMPARATOR);

for (String foundWord : hitWords) {

text = text.replaceAll("(?iu)" + foundWord, replacement);

}

return text;

}

private final static Comparator WORD_COMPARATOR = new Comparator() {

public int compare(String one, String other) {

return other.length() - one.length();

}

};

/**

*

* @author loudyn

*

*/

protected static final class WordNode {

static final int END_TYPE = 1;

static final int MIDSIDE_TYPE = 1 << 1;

static final int BOTH_TYPE = END_TYPE | MIDSIDE_TYPE;

final char value;

int type;

WordNode parent;

List children = new ArrayList(0);

WordNode(WordNode parent, char value, int type) {

this.parent = parent;

this.value = value;

this.type = type;

}

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值