一、DFA
DFA全称为:Deterministic Finite Automaton,即确定有穷自动机。
通过list来构造map树,叶子节点map只包含键值对isend=1;
结构展示:
java实现如下:
/**
* 通过list构造map树,返回根map
* @param list
* @return
*/
private Map<String, Object> getPreMap(List<String> list) {
Map<String,Object> root = new HashMap<String,Object>();
if(list == null || list.size() == 0){
return root;
}
root.put("isend","0");
for(int i=0;i<list.size();i++){
String[] chars = list.get(i).split("");
composePreMap(root,chars);
}
return root;
}
private void composePreMap(Map<String, Object> root, String[] chars) {
for(int j=0;j<chars.length;j++ ) {
if("1".equals(root.get("isend"))){
break;
}else if(root.get(chars[j])==null ){
Map<String,Object> map = new HashMap<String,Object>();
map.put("isend",j==chars.length-1?"1":"0");
root.put(chars[j],map);
root = map;
}else{
root = (Map<String, Object>) root.get(chars[j]);
if(j==chars.length-1){
root.clear();
root.put("isend","1");
}
}
}
}
二、将map树放入Redis
1.引入依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-json</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
2.创建自己的StringRedisTemplate,避免影响系统默认的StringRedisTemplate
@Configuration
public class MyRedisTemplate {
@Bean("myStringRedisTemplate")
public StringRedisTemplate myStringRedisTemplate(RedisConnectionFactory fc){
StringRedisTemplate myStringRedisTemplate = new StringRedisTemplate(fc);
myStringRedisTemplate.setHashValueSerializer(new Jackson2JsonRedisSerializer<Object>(Object.class));
myStringRedisTemplate.setValueSerializer(new Jackson2JsonRedisSerializer<Object>(Object.class));
return myStringRedisTemplate;
}
@Bean
@Primary
public StringRedisTemplate stringRedisTemplate(RedisConnectionFactory redisConnectionFactory) {
StringRedisTemplate template = new StringRedisTemplate();
template.setConnectionFactory(redisConnectionFactory);
return template;
}
}
3.使用自己的StringRedisTemplate来存储和读取缓存
package com.test.redis;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/***************************************************************************************
*功能介绍:敏感词判断
*@date 2021/8/815:36
***************************************************************************************/
@Component
public class KeywordCacheUtil {
@Autowired
@Qualifier("myStringRedisTemplate")
private StringRedisTemplate myStringRedisTemplate;
/**
* 提供给客户端调用判断所给字符串是否包含敏感词
*/
public boolean containKeywords(String additional) {
//获取所有键值对
Map preMap = myStringRedisTemplate.opsForHash().entries("VirtualCcyKeywordUtil");
String[] add = additional.split("");
for (int i = 0; i < add.length; i++) {
if (ifexist(add, i, preMap)) {
return true;
}
}
return false;
}
private boolean ifexist(String[] add, int i, Map<String, Object> preMap) {
while (i < add.length && preMap.get(add[i]) != null) {
if ("1".equals(((Map<String, Object>) preMap.get(add[i])).get("isend"))) {
return true;
}
preMap = (Map<String, Object>) preMap.get(add[i]);
i++;
}
return false;
}
/**
* 应用启动后敏感词缓存预加载
*/
@PostConstruct
private void reload() {
List<String> list = new ArrayList<String>();
// list.add("abc");
list.add("cd");
Map<String, Object> preMap = getPreMap(list);
preMap.forEach((k, v) -> {
System.out.println(k + ":" + v);
});
myStringRedisTemplate.delete("VirtualCcyKeywordUtil");
myStringRedisTemplate.opsForHash().putAll("VirtualCcyKeywordUtil", preMap);
}
}
注意点:
1、StringRedisTemplate默认的序列化器StringRedisSerializer在处理对象的时候会类型转换错误;需要手工指定它的反序列化器为Jackson2JsonRedisSerializer。
2、Jackson2JsonRedisSerializer需要引用ObjectMapper类,所以需要引入spring-boot-starter-json依赖。
3、配置自己的StringRedisTemplate,避免影响项目中已有的StringRedisTemplate的使用。
三、优化补充:
1、适用
词库量较大的时候,避免每个词都去和整个库进行匹配,影响效率。
2、字典树优化
可以参照下列优化项,将算法对象化(将属性和方法都封装到对象里面)。
class TrieNode {
Map<Character, TrieNode> children = new HashMap<>();
boolean isEndOfWord = false;
}
class Trie {
private final TrieNode root;
public Trie() {
root = new TrieNode();
}
public void insert(String word) {
TrieNode node = root;
for (char ch : word.toCharArray()) {
node.children.putIfAbsent(ch, new TrieNode());
node = node.children.get(ch);
}
node.isEndOfWord = true;
}
public List<String> searchAll(String text) {
List<String> result = new ArrayList<>();
int length = text.length();
for (int i = 0; i < length; ) {
TrieNode node = root;
int longestMatch = -1;
for (int j = i; j < length; j++) {
char ch = text.charAt(j);
if (!node.children.containsKey(ch)) {
break;
}
node = node.children.get(ch);
if (node.isEndOfWord) {
longestMatch = j;
}
}
if (longestMatch == -1) {
result.add(text.substring(i, i + 1));
i++;
} else {
result.add(text.substring(i, longestMatch + 1));
i = longestMatch + 1;
}
}
return result;
}
}
参考&鸣谢
nowcoder.com