文章目录
基于前缀树的敏感词过滤
知识点:
- @PostConstruct
有点像静态代码块的作用,在方法构造完成后将字符串插入前缀树。 - resource下的文件会被加载到类路径下,通过class.getClasssLoader().getResrouceAsStream()可读取
- 跳过特殊字符的匹配算法。
@Component
@Slf4j
public class SensitiveFilter {
private TireNode root = new TireNode();
private static final String REPLACE_WORD = "***";
@PostConstruct
public void init() {
//加载类路径下的敏感词文件,加入到前缀树
try(
final InputStream is = this.getClass().getClassLoader().getResourceAsStream("sensitive-words.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(is));
) {
String keyWord;
while ((keyWord = br.readLine()) != null) {
this.addKeyWord(keyWord);
}
} catch (IOException e) {
log.error("加载错误:{}", e.getMessage());
e.printStackTrace();
}
}
public String filter(String text) {
if (StringUtils.isBlank(text)) return "";
final char[] chs = text.toCharArray();
TireNode cur = root;
int end = 0, start = 0;
StringBuilder sb = new StringBuilder();
while (end < chs.length) {
if (isSymbol(chs[end])) {
//当前字符是特殊字符,选择跳过检查
if (cur == root) {
//当前前缀树指针没动,需要把两个指针全部右移一位
start++;
}
//TODO:通过注释这句话可以让特殊字符不显示
sb.append(chs[end]);
//无论怎么样,end都会+1
end++;
} else {
cur = cur.getNode(chs[end]);
if (cur == null) {
//若当前字符在前缀树找不到一条路径,添加开始字符到res
//abc babc 若start指向b,end指向a,那么不符合前缀树路径abc,start就会右移一位,同时end紧跟着右移一位
sb.append(chs[start]);
end = ++start;
cur = root;
} else if (cur.isKeyWordEnd()) {
//当前start到end的位置满足一个敏感字符
sb.append(REPLACE_WORD);
//start和end到达下一个位置
start = ++end;
cur = root;
} else {
//还没跑完前缀树一条路线
end++;
}
}
}
//将最后一批字符加入res
// 如abc abcae 此时start = d, end = e
sb.append(text.substring(start));
return sb.toString();
}
//返回是否为特殊字符
//后两个范围是东亚字符
private boolean isSymbol(char ch) {
return !CharUtils.isAscii(ch) && (ch < 0x2E80 || ch > 0x9FFF);
}
/**
*将keyword加入到前缀树中
*/
private void addKeyWord(String keyWord) {
final char[] chars = keyWord.toCharArray();
TireNode node = null, cur = root;
for (char ch : chars) {
if ((node = cur.getNode(ch)) == null) {
node = new TireNode();
//新的节点放到下轮位置
cur.addNode(ch, node);
}
cur = node;
}
if (node != null) {
node.setKeyWordEnd(true);
}
}
private class TireNode {
@Getter
@Setter
private boolean isKeyWordEnd = false;
private Map<Character, TireNode> nexts = new HashMap<>();
public void addNode(char c, TireNode node) {
nexts.put(c,node);
}
public TireNode getNode(char c) {
return nexts.get(c);
}
}
}