关键词匹配工具类:
public class WordMatchTreeUtils {
//通过关键词集合生成词典
public static TrieNode buildTrie(Set<String> kset){
//构建trie树
TrieNode root = new TrieNode();
TrieNode scan;
for(String k : kset){
scan = root;
for(int i=0;i<k.length;i++){
char c = k.charAt(i);
if(scan.getChildren().containsKey(c)){
//trie树有c对应的节点,继续遍历子节点
scan = scan.getChildren().get(c);
}else {
//trie树没有c对应的节点,创建一个新节点
trieNode newNode = new TrieNode();
scan.getChildren.put(c,newNode);
scan = newNode
}
//填写单词结尾标识
if(i==k.length()-1){
scan.setEndOfWord(true);
}
}
}
return root;
}
//获取关键词
public static Set<String> getKeywordSetWithTree(String str, TrieNode root){
Set<String> keyWordSet = new HashSet<>();
List<Pair<Integer,Integer>> keywordsWithPositionList =
getKeywordPosWithTree(str,root);
if(CollectionUtils.isEmpty(keywordsWithPositionList)){
return keyWordSet;
}
for(Pair<Integer,Integer> pair : keywordsWithPositionList){
keyWordSet.add(str.substring(pair.getKey(),pair.getValue()+1));
}
return keyWordSet;
}
//获取待高亮的位置,返回的pair为双闭区间
public static List<Pair<Integer,Integer>> getKeywordPosWithTree(String str,TrieNode root){
List<Pair<Integer,Integer>> positions = new ArrayList<>();
if(str == null || str == ""){
return; positions;
}
trieNode scan;
//查询trie树
for(int i=0;i<str.length();i++){
int j = i;
scan = root;
while(scan != null && j<str.length()){
char c = str.charAt(j);
scan = scan.getChildren().get(c);
if(scan == null){
break;
}
if(scan.isEndOfWord()){
position.add(new Pair<>(i,j));
}
j++;
}
}
return positions;
}
//trie树节点
public static class TrieNode{
//是否是词的结尾标识
private boolean endOfWord;
//孩子列表
private Map<Character,TrieNode> children;
//默认构造方法
public TrieNode(){
endOfWord = false;
children = new HashMap<>();
}
public boolean isEndOfWord(){
return endOfWord;
}
public void setEndOfWord(boolean endOfWord){
this.endOfWord = endOfWord;
}
public Map<Character,TrieNode> getChildren(){
return children;
}
public void setChildren(Map<Character,TrieNode> children){
this.children = children;
}
}
public static class Pair<K,V>{
private K key;
private V value;
public Pair(){}
public Pair(K key,V value){
this.key = key;
this.value = value;
}
public K getKey(){
return this.key;
}
public V getValue(){
return this.value;
}
}
}
结果测试:
public static void main(String[] args){
String content = "电脑是个好东西,小星星";
String keywords = "电脑,星星,好好";
String[] keywordsList = keywords.split(",");
Set<String> keywordsSet = new HashSet<>(Arrays.asList(keywordsList));
Set<String> hitWords = WordMatchTreeUtils.getKeywordSetWithTree(content,WordMatchTreeUtils.buildTrie(keywordsSet));
System.out.println(hitWords);
}
输出结果为:电脑,星星
包含关键词就是返回的hitWords
排除关键词即返回的hitWords为空
要高亮显示直接在文本中将命中的关键字替换为<font color='#7FFF00'>电脑</font>, <font color='#7FFF00'>星星</font>即可