//... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
IndexSearcher searcher = new IndexSearcher(directory);
QueryParser parser = new QueryParser("notv", analyzer);
Query query = parser.parse("million");
TopDocs hits = searcher.search(query, 10);
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
for (int i = 0; i < 10; i++) {
int id = hits.scoreDocs[i].doc;
Document doc = searcher.doc(id);
String text = doc.get("notv");
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "notv", analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
System.out.println((frag[j].toString()));
}
}
//Term vector
text = doc.get("tv");
tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.scoreDocs[i].doc, "tv", analyzer);
frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
System.out.println((frag[j].toString()));
}
}
System.out.println("-------------");
}
代码需要导入highlight包。
高亮显示顺便能对文本进行摘要提取。提取的摘要自动挂上高亮显示了。