背景
因为最近有通讯有个需求,说需要让多家客户公司可以自定义敏感词过滤掉他们自定义的规则,选择了DFA算法来做,不过和以前传统了DFA写法不太一样了
模式图
直接上代码
public class KeywordFilter {
public static Map<String, HashMap> currentMap = new ConcurrentHashMap<String, HashMap>();
public static Map nowhash = null ;
public static Object wordMap;
private KeywordFilter () {
}
private static String getKey (int companyId) {
return "companyId" + companyId;
}
public static void clear () {
try {
currentMap.clear();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
public static void saveKeywords (int companyId, List<String> keywords) {
try {
Map tempAllMap = currentMap;
String key = getKey(companyId);
int l = keywords.size();
int il;
Map tempMap;
for (int i = 0 ; i < l; i++) {
String key2 = keywords.get (i).trim();
nowhash = currentMap;
il = key2.length();
for (int j = 0 ; j < il; j++) {
char word = key2.charAt(j);
tempMap = (Map) nowhash.get (word);
wordMap = nowhash.get (word);
if (wordMap != null ) {
if (!tempMap.containsKey(key)) {
nowhash.put(key, 0 );
}
nowhash = (HashMap) wordMap;
} else {
HashMap<String, String> newWordHash = new HashMap<String, String>();
newWordHash.put(key, "0" );
nowhash.put(word, newWordHash);
nowhash = newWordHash;
}
if (j == il - 1 ) {
nowhash.put(key, "1" );
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
nowhash = null ;
wordMap = null ;
}
}
public static List<String> repword (int companyId, String txt) {
Map tempMap = currentMap;
List<String> result = new ArrayList<String>();
String key = getKey(companyId);
nowhash = currentMap;
int l = txt.length();
char word;
String keywordStr = "" ;
String keyStatu;
StringBuilder keyword = new StringBuilder();
for (int i = 0 ; i < l; i++) {
word = txt.charAt(i);
wordMap = nowhash.get (word);
if (wordMap != null ) {
keyword.append(word);
Object te = nowhash = (HashMap) wordMap;
if (nowhash.get (key) != null
&& nowhash.get (key).toString().equals("1" )) {
if (i < l - 1 && nowhash.get (txt.charAt(i + 1 )) != null ) {
continue ;
}
txt = txt.replaceAll(keyword.toString(), "*" );
nowhash = currentMap;
keywordStr += keyword.toString() + "," ;
i = i - keyword.length() + 1 ;
l = txt.length();
keyword.delete(0 , keyword.length());
}
} else {
nowhash = currentMap;
keyword.delete(0 , keyword.length());
continue ;
}
}
nowhash = null ;
wordMap = null ;
result.add(txt);
result.add(keywordStr.length() - 1 > 0 ? keywordStr.substring(0 ,
keywordStr.length() - 1 ) : keywordStr);
return result;
}
private static int checkKeyWords (String txt, int companyId, int begin) {
int result = 0 ;
String key = getKey(companyId);
try {
nowhash = currentMap;
int l = txt.length();
char word = 0 ;
for (int i = begin; i < l; i++) {
word = txt.charAt(i);
wordMap = nowhash.get (word);
if (wordMap != null ) {
result++;
nowhash = (HashMap) wordMap;
if (((String) nowhash.get (key)).equals("1" )) {
nowhash = null ;
wordMap = null ;
return result;
}
} else {
result = 0 ;
break ;
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
nowhash = null ;
wordMap = null ;
return result;
}
}
public static String getTxtKeyWords (String txt, int companyId) {
String result = null ;
StringBuilder temp = new StringBuilder();
String key;
int l = txt.length();
for (int i = 0 ; i < l;) {
int len = checkKeyWords(txt, companyId, i);
if (len > 0 ) {
key = (txt.substring(i, i + len));
temp.append(key + "," );
txt = txt.replaceAll(key, "" );
l = txt.length();
} else {
i++;
}
}
if (temp.length() > 0 ) {
result = temp.substring(0 , temp.length() - 1 );
}
return result;
}
public boolean isKeyWords (String txt, int companyId) {
for (int i = 0 ; i < txt.length(); i++) {
int len = checkKeyWords(txt, companyId, i);
if (len > 0 ) {
return true ;
}
}
return false ;
}
public static void main (String[] arg) {
List<String> keywords = new ArrayList<String>();
keywords.add("傻×" );
keywords.add("汉奸" );
keywords.add("草" );
keywords.add("草泥马" );
KeywordFilter.saveKeywords(1 , keywords);
String txt = "是傻×汉奸傻A傻B傻C傻D汉奸傻×草泥马" ;
List<String> list = repword(1 , txt);
System.out .println("文中包含的敏感字为:" + list.get (1 ));
System.out .println("原文:" + txt);
System.out .println("敏感字过滤后:" + list.get (0 ));
}
}