最近遇到了一个需求,将弹幕里面的敏感词过滤
一 、DFA算法
二 、java实现
1. 初始化敏感词库,将敏感词加入到 HashMap 中,考虑到搜索效率,这里我们将敏感词库存储在Redis
public class SensitiveWordInit {
//字符编码
private String ENCODING = "GBK";
public HashMap sensitiveWordMap;
SensitiveWordInit(){
super();
}
/**
* @version 1.0
*/
public Map initKeyWord(){
try {
//读取敏感词库
Set<String> keyWordSet = readSensitiveWordFile();
//将敏感词库加入到HashMap中
addSensitiveWordToHashMap(keyWordSet);
} catch (Exception e) {
e.printStackTrace();
}
return sensitiveWordMap;
}
/**
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
* @param keyWordSet 敏感词库
* @version 1.0
*/
private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
//初始化敏感词容器,减少扩容操作
sensitiveWordMap = new HashMap(keyWordSet.size());
String key = null;
Map nowMap = null;
Map<String, String> newWorMap = null;
//迭代keyWordSet
Iterator<String> iterator = keyWordSet.iterator();
while(iterator.hasNext()){
//关键字
key = iterator.next();
nowMap = sensitiveWordMap;
for(int i = 0 ; i < key.length() ; i++){
//转换成char型
char keyChar = key.charAt(i);
//获取
Object wordMap = nowMap.get(keyChar);
//如果存在该key,直接赋值
if(wordMap != null){
nowMap = (Map) wordMap;
}
else{ //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new HashMap<String,String>();
//不是最后一个
newWorMap.put("isEnd", "0");
nowMap.put(keyChar, newWorMap);
now
//字符编码
private String ENCODING = "GBK";
public HashMap sensitiveWordMap;
SensitiveWordInit(){
super();
}
/**
* @version 1.0
*/
public Map initKeyWord(){
try {
//读取敏感词库
Set<String> keyWordSet = readSensitiveWordFile();
//将敏感词库加入到HashMap中
addSensitiveWordToHashMap(keyWordSet);
} catch (Exception e) {
e.printStackTrace();
}
return sensitiveWordMap;
}
/**
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
* @param keyWordSet 敏感词库
* @version 1.0
*/
private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
//初始化敏感词容器,减少扩容操作
sensitiveWordMap = new HashMap(keyWordSet.size());
String key = null;
Map nowMap = null;
Map<String, String> newWorMap = null;
//迭代keyWordSet
Iterator<String> iterator = keyWordSet.iterator();
while(iterator.hasNext()){
//关键字
key = iterator.next();
nowMap = sensitiveWordMap;
for(int i = 0 ; i < key.length() ; i++){
//转换成char型
char keyChar = key.charAt(i);
//获取
Object wordMap = nowMap.get(keyChar);
//如果存在该key,直接赋值
if(wordMap != null){
nowMap = (Map) wordMap;
}
else{ //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new HashMap<String,String>();
//不是最后一个
newWorMap.put("isEnd", "0");
nowMap.put(keyChar, newWorMap);
now