1.前缀树
名称:Trie、字典树、查找树 特点:查找效率高,消耗内存大 应用:字符串检索、词频统计、字符串排序等
2.敏感词过滤器
1.定义前缀树 2.根据敏感词,初始化前缀树 3.编写过滤敏感词的方法
3.创建保存敏感词的txt文件
先设一点即可
4.创建一个过滤敏感词的工具类
@Component
public class SensitiveFilter {
private static final Logger logger = LoggerFactory. getLogger ( SensitiveFilter. class ) ;
private static final String REPLACEMENT = "***" ;
private TrieNode rootNode = new TrieNode ( ) ;
@PostConstruct
public void init ( ) {
try (
InputStream is = this . getClass ( ) . getClassLoader ( ) . getResourceAsStream ( "sensitive-words.txt" ) ;
BufferedReader reader = new BufferedReader ( new InputStreamReader ( is) ) ;
) {
String keyword;
while ( ( keyword = reader. readLine ( ) ) != null) {
this . addKeyword ( keyword) ;
}
} catch ( IOException e) {
logger. error ( "加载敏感词文件失败!" + e. getMessage ( ) ) ;
}
}
private void addKeyword ( String keyword) {
TrieNode tempNode = rootNode;
for ( int i = 0 ; i < keyword. length ( ) ; i++ ) {
char c = keyword. charAt ( i) ;
TrieNode subNode = tempNode. getSubNode ( c) ;
if ( subNode == null) {
subNode = new TrieNode ( ) ;
tempNode. addSubNode ( c, subNode) ;
}
tempNode = subNode;
if ( i == keyword. length ( ) - 1 ) {
tempNode. setKeywordEnd ( true ) ;
}
}
}
public String filter ( String text) {
if ( StringUtils. isBlank ( text) ) {
return null;
}
TrieNode tempNode = rootNode;
int begin = 0 ;
int position = 0 ;
StringBuilder sb = new StringBuilder ( ) ;
while ( position < text. length ( ) ) {
char c = text. charAt ( position) ;
if ( isSymbol ( c) ) {
if ( tempNode == rootNode) {
sb. append ( c) ;
begin++ ;
}
position++ ;
continue ;
}
tempNode = tempNode. getSubNode ( c) ;
if ( tempNode == null) {
sb. append ( text. charAt ( begin) ) ;
position = ++ begin;
tempNode = rootNode;
} else if ( tempNode. isKeywordEnd ( ) ) {
sb. append ( REPLACEMENT) ;
begin = ++ position;
tempNode = rootNode;
} else {
position++ ;
}
}
sb. append ( text. substring ( begin) ) ;
return sb. toString ( ) ;
}
private boolean isSymbol ( Character c) {
return ! CharUtils. isAsciiAlphanumeric ( c) && ( c < 0x2E80 || c > 0x9FFF ) ;
}
private class TrieNode {
private boolean isKeywordEnd = false ;
private Map< Character, TrieNode> subNodes = new HashMap < > ( ) ;
public boolean isKeywordEnd ( ) {
return isKeywordEnd;
}
public void setKeywordEnd ( boolean keywordEnd) {
isKeywordEnd = keywordEnd;
}
public void addSubNode ( Character c, TrieNode node) {
subNodes. put ( c, node) ;
}
public TrieNode getSubNode ( Character c) {
return subNodes. get ( c) ;
}
}
}
5.创建测试方法
@RunWith ( SpringRunner. class )
@SpringBootTest
@ContextConfiguration ( classes = CommunityApplication. class )
public class SensitiveTests {
@Autowired
private SensitiveFilter sensitiveFilter;
@Test
public void testSensitiveFilter ( ) {
String text = "这里可以赌博,可以嫖娼, 可以吸毒, 可以开票, 可以吹牛逼!!!" ;
text = sensitiveFilter. filter ( text) ;
System. out. println ( text) ;
text = "这里可以✳赌✳博✳,可以✳嫖✳娼✳, 可以✳吸✳毒✳, 可以✳开✳票✳, 可以✳吹✳牛✳逼✳!!!" ;
text = sensitiveFilter. filter ( text) ;
System. out. println ( text) ;
}
}
6.测试结果