DFA算法
package com.maphao.manage.util;
import lombok.extern.slf4j.Slf4j;
import java.util.*;
/**
* DFA
* @Author: fxp
* @Date: 2024/3/11 9:20
* @Description
*/
@Slf4j
public class MySensitiveWordUtil {
private static Set<String> words = new HashSet<>();
private static Map wordsTree = new HashMap();
private static void init(){
}
/**
* 刷新词库树
*/
private static void refreshTree(){
wordsTree = new HashMap();
setTree(words);
}
/**
* 生成词库树
* @param words
*/
private static void setTree(Set<String> words){
for (String w : words) {
Map nowMap = wordsTree;
for (int i = 0; i < w.length(); i++) {
char c = w.charAt(i);
Object o = nowMap.get(c);
if (null == o){
Map map = new HashMap();
map.put("isEnd",i == w.length()-1 ? "0" : "1");
nowMap.put(c,map);
nowMap = map;
}else {
nowMap = (Map) o;
}
if (i == w.length()-1){
nowMap.put("isEnd","0");
}
}
addWord(w);
}
}
private static void addWord(String word){
words.add(word);
}
private static void removeWord(String word){
words.remove(word);
}
/**
* 取出字典中的值
* 将对应的词 isEnd 改为 2
* @param words
*/
private static void removeWords(Set<String> words){
for (String word : words) {
removeWord(word);
}
refreshTree();
}
/**
* 取出字典中的值
* 将对应的词 isEnd 改为 1
* @param words
*/
private static void removeWordsUpdate1(Set<String> words){
for (String w : words) {
Map nowMap = wordsTree;
for (int i = 0; i < w.length(); i++) {
char c = w.charAt(i);
Object o = nowMap.get(c);
if (null == o){
break;
}else {
nowMap = (Map) o;
if (i == w.length() - 1){
nowMap.put("isEnd",1);
}
}
}
}
}
/**
* 是否包含 词库中的值
* @param text
* @return
*/
private static Boolean contains(String text){
Map nowMap = wordsTree;
for (int i = 0; i < text.length(); i++) {
Object o = nowMap.get(text.charAt(i));
if (null != o){
Map map = (Map) o;
if ("0".equals(map.get("isEnd"))){
return true;
}else{
nowMap = map;
}
}
}
return false;
}
private static String containsAndReplace(String text,char symbol){
List<Map<String,Integer>> list = new ArrayList<>();
int indexStar = 0;
int indexEnd = 0;
Map nowMap = wordsTree;
Boolean b = false;
Boolean b1 = false;
for (int i = 0; i < text.length(); i++) {
Object o = nowMap.get(text.charAt(i));
if (null != o){
indexStar = !b1 ? i : indexStar;
Map map = (Map) o;
if ("0".equals(map.get("isEnd"))){
indexEnd = i;
b = true;
if (map.size() == 1){
Map<String,Integer> replaceMap = new HashMap<>();
replaceMap.put("indexStar",indexStar);
replaceMap.put("indexEnd",indexEnd);
list.add(replaceMap);
indexStar = 0;
indexEnd = 0;
b = false;
b1 = false;
nowMap = wordsTree;
continue;
}
}else {
b1 = true;
}
nowMap = map;
}else if (b){
Map<String,Integer> replaceMap = new HashMap<>();
replaceMap.put("indexStar",indexStar);
replaceMap.put("indexEnd",indexEnd);
list.add(replaceMap);
indexStar = 0;
indexEnd = 0;
b = false;
b1 = false;
nowMap = wordsTree;
continue;
}else{
indexStar = 0;
indexEnd = 0;
b = false;
b1 = false;
nowMap = wordsTree;
}
}
StringBuilder sb = new StringBuilder();
int k = 0;
for (int i = 0; i < list.size(); i++) {
Map<String, Integer> map = list.get(i);
StringBuilder substring = new StringBuilder("");
for (int j = map.get("indexStar"); j <= map.get("indexEnd"); j++) {
substring.append(symbol);
}
sb.append(text.substring(k,map.get("indexStar")));
sb.append(substring);
k = map.get("indexEnd")+1;
}
sb.append(text.substring(k,text.length()));
return sb.toString();
}
public static void main(String[] args) {
Set<String> sensitiveWordSet = new HashSet<>();
sensitiveWordSet.add("成人电影");
sensitiveWordSet.add("成人影像");
sensitiveWordSet.add("成人");
sensitiveWordSet.add("静静");
sensitiveWordSet.add("静一静");
setTree(sensitiveWordSet);
System.out.println(contains("成功"));
System.out.println(contains("静一一经"));
System.out.println(contains("电影好看"));
System.out.println(contains("看电影要静静好吗"));
log.info(containsAndReplace("成人了就爱看成人电影和成人影视,看的时候需要安静一下,才能静静看,可以静一静",'*'));
Set<String> set = new HashSet<>();
set.add("成人");
// removeWords(set);
removeWordsUpdate1(set);
log.info(containsAndReplace("成人了就爱看成人电影和成人影视,看的时候需要安静一下,才能静静看,可以静一静",'*'));
}
}