过滤的词汇用word.properties文件,用map的方式储存,放在项目中src目录下
定义一个过滤器KeywordFilter.class
package com.sobinterface.util;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@SuppressWarnings({ "rawtypes", "unchecked" })
public class KeywordFilter {
/** 直接禁止的 */
private HashMap keysMap = new HashMap();
private int matchType = 1; // 1:最小长度匹配 2:最大长度匹配
public void addKeywords(List<String> keywords) {
for (int i = 0; i < keywords.size(); i++) {
String key = keywords.get(i).trim();
HashMap nowhash = null;
nowhash = keysMap;
for (int j = 0; j < key.length(); j++) {
char word = key.charAt(j);
Object wordMap = nowhash.get(word);
if (wordMap != null) {
nowhash = (HashMap) wordMap;
} else {
HashMap<String, String> newWordHash = new HashMap<String, String>();
newWordHash.put("isEnd", "0");
nowhash.put(word, newWordHash);
nowhash = newWordHash;
}
if (j == key.length() - 1) {
nowhash.put("isEnd", "1");
}
}
}
}
/**
* 重置关键词
*/
public void clearKeywords() {
keysMap.clear();
}
/**
* 检查一个字符串从begin位置起开始是否有keyword符合, 如果有符合的keyword值,返回值为匹配keyword的长度,否则返回零 flag 1:最小长度匹配 2:最大长度匹配
*/
private int checkKeyWords(String txt, int begin, int flag) {
HashMap nowhash = null;
nowhash = keysMap;
int maxMatchRes = 0;
int res = 0;
int l = txt.length();
char word = 0;
for (int i = begin; i < l; i++) {
word = txt.charAt(i);
Object wordMap = nowhash.get(word);
if (wordMap != null) {
res++;
nowhash = (HashMap) wordMap;
if (((String) nowhash.get("isEnd")).equals("1")) {
if (flag == 1) {
wordMap = null;
nowhash = null;
txt = null;
return res;
} else {
maxMatchRes = res;
}
}
} else {
txt = null;
nowhash = null;
return maxMatchRes;
}
}
txt = null;
nowhash = null;
return maxMatchRes;
}
/**
* 返回txt中关键字的列表
*/
public Set<String> getTxtKeyWords(String txt) {
Set set = new HashSet();
int l = txt.length();
for (int i = 0; i < l;) {
int len = checkKeyWords(txt, i, matchType);
if (len > 0) {
set.add(txt.substring(i, i + len));
i += len;
} else {
i++;
}
}
txt = null;
return set;
}
/**
* 返回过滤违禁词后的列表,并且被替换为“*”
*/
public String getFilerWords(String txt) {
int l = txt.length();
String filterWords = txt;
for (int i = 0; i < l;) {
int len = checkKeyWords(txt, i, matchType);
if (len > 0) {
filterWords = filterWords.replaceAll(txt.substring(i, i + len), "*");
i += len;
} else {
i++;
}
}
return filterWords;
}
/**
* 仅判断txt中是否有关键字
*/
public boolean isContentKeyWords(String txt) {
for (int i = 0; i < txt.length(); i++) {
int len = checkKeyWords(txt, i, 1);
if (len > 0) {
return true;
}
}
txt = null;
return false;
}
public int getMatchType() {
return matchType;
}
public void setMatchType(int matchType) {
this.matchType = matchType;
}
//
// public static void main(String[] args) {
// KeywordFilter filter = new KeywordFilter();
// Properties prop = new Properties();
// // 读取属性文件a.properties
// try {
// InputStream in = new BufferedInputStream(new FileInputStream("word.properties"));
// prop.load(in); // /加载属性列表
// Iterator<String> it = prop.stringPropertyNames().iterator();
// List<String> keywords = new ArrayList<String>();// 可以把这个列表变成word.properties文件(包含所有违禁词)
// while (it.hasNext()) {
// String key = it.next();
// keywords.add(key);
System.out.println(key + ":" + prop.getProperty(key));
// }
// filter.addKeywords(keywords);
// in.close();
// } catch(IOException e) {
// e.printStackTrace();
// }
//
// String txt = "中s国,人民站起卖国=来了,";
// boolean boo = filter.isContentKeyWords(txt);
// System.out.println(boo);
// Set set = filter.getTxtKeyWords(txt);
// System.out.println(set);
// System.out.println(filter.getFilerWords(txt));
// }
}
再定义一个MyProperties.class来读取properties文件里面的过滤词
package com.sobinterface.util;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.ResourceBundle;
public class MyProperties {
private static KeywordFilter filter = new KeywordFilter();
static {
// Properties prop = new Properties();
// try {
ResourceBundle rb = ResourceBundle.getBundle("word");
List<String> keywords = new ArrayList<String>();
Enumeration<String> enume = rb.getKeys();
while (enume.hasMoreElements()) {
String key = enume.nextElement();
// System.out.println(key);
keywords.add(key.toLowerCase());
}
// InputStream in = new BufferedInputStream(new FileInputStream("../webapps/sob2/WEB-INF/classes/word.properties"));
// prop.load(in);
// Iterator<String> it = prop.stringPropertyNames().iterator();
// while (it.hasNext()) {
// String key = it.next();
// keywords.add(key);
// System.out.println(key + ":" + prop.getProperty(key));
// }
filter.addKeywords(keywords);
// in.close();
// } catch(IOException e) {
// e.printStackTrace();
// }
}
private MyProperties() {
}
public static KeywordFilter getFilter() {
return filter;
}
// public static void main(String args[]) {
// KeywordFilter filter = MyProperties.getFilter();
// System.out.println(filter.isContentKeyWords("sb"));
// }
}
最后在业务处理层service中把词语过滤,就OK了
// 检验评论是否存在违禁敏感词汇
KeywordFilter filter = MyProperties.getFilter();
commentText = commentText.toLowerCase();
if (filter.isContentKeyWords(commentText)) {
return getMessageInfo(MessageEnum.VALIDATE_COMMENTTEXT_ERR);
}