搜索引擎展示的结果中对用户的输入信息进行了配色方面的处理,这种区分正常文本和输入内容的效果即是高亮显示;
这样做的好处:
·视觉上让人便于查找有搜索对应的文本块;
·界面展示更友好;
·lucene提供了highlighter插件来体现类似的效果;
highlighter对查询关键字高亮处理:
import com.example.springboot.entity.Patent;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @Description: TODO
* @author: dell
* @date: 2022年04月05日 15:16
*/
public class DataBaseSearcher {
public static final String INDEXPATH = "D:\\lucene-4.6.0\\indexAndDocs\\index";
private static int TOP_NUM = 100; // 显示记录数
public static List<Patent> searchData(String queryString) {
Directory dir = null;
List<Patent> patentList = new ArrayList<>();
try {
//索引地址
File f = new File(INDEXPATH);
dir = FSDirectory.open(f);
IndexReader reader = DirectoryReader.open(dir);
//添加reader
IndexSearcher searcher = new IndexSearcher(reader);
//lucene系统分词处理
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
//添加查找的字段
String fieldString1 = "name";
String fieldString2 = "abstract";
//String fieldString3 = "zqx";
String[] fields = {fieldString1, fieldString2};
//搜索多个字段,多条件搜索
//QueryParser parser = new QueryParser(Version.LUCENE_46,fieldString2, analyzer);
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_46, fields, analyzer);
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = parser.parse(queryString);
//Query query = parser.parse(queryStr);
QueryScorer scorer = new QueryScorer(query, fieldString2);
SimpleHTMLFormatter fors = new SimpleHTMLFormatter("<span style=\"color:yellow;\">", "</span>");
Highlighter highlighter = new Highlighter(fors, scorer);
TopDocs hits = searcher.search(query, 1000); // 查找操作,显示n条数据,现在是5
for (ScoreDoc scoreDoc : hits.scoreDocs) {
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scoreDoc.doc, fieldString1, analyzer);
Document doc = searcher.doc(scoreDoc.doc); // 根据文档打分得到文档的内容
Fragmenter fragment = new SimpleSpanFragmenter(scorer, doc.get("abstract").length());
highlighter.setTextFragmenter(fragment);
//高亮news_title域
String name = highlighter.getBestFragment(tokenStream, doc.get("name"));//获取高亮的片段,可以对其数量进行限制
tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scoreDoc.doc, fieldString2, analyzer);
String _abstarct = highlighter.getBestFragment(tokenStream, doc.get("abstract"));//获取高亮的片段,可以对其数量进行限制
Patent patent = new Patent();
patent.setId(doc.get("id"));
if(name != null){
patent.setName(name);
}else{
patent.setName("<span>"+doc.get("name")+"</span>");
}
if(_abstarct != null){
patent.setContent(_abstarct);
}else{
patent.setName("<span>"+doc.get("abstract")+"</span>");
}
patent.setType(doc.get("type"));
patent.setSqrq(doc.get("sqrq"));
patent.setGkr(doc.get("gkr"));
patent.setGkh(doc.get("gkh"));
patent.setZflh(doc.get("zflh"));
patent.setFlh(doc.get("flh"));
patent.setFmr(doc.get("fmr"));
patent.setZldljg(doc.get("zldljg"));
patent.setDlr(doc.get("dlr"));
patent.setSsdm(doc.get("ssdm"));
patent.setFlzt(doc.get("flzt"));
patent.setUrl(doc.get("url"));
patent.setField(doc.get("field"));
patent.setWeight((int) (1000 * (scoreDoc.score)));
patentList.add(patent);
}
} catch (IOException | ParseException | InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return patentList;
}
}