package cn.lucene.highlighter;
import java.io.File;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import cn.lucene.utils.LuceneUtil;
public class HighLighterDemo {
String filePath = "F:\\workspace\\Lucene\\resource\\小笑话.txt";
String indexPath = "F:\\workspace\\Lucene\\indexPath";
Analyzer analyzer = new MMAnalyzer();
@Test
public void create() throws Exception{
Directory directory = FSDirectory.getDirectory(indexPath);
IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, MaxFieldLength.LIMITED);
Document document = LuceneUtil.fileToDocument(new File(filePath));
indexWriter.addDocument(document);
indexWriter.close();
}
@Test
public void query() throws Exception{
String queryString = "房间";
IndexSearcher searcher = new IndexSearcher(indexPath);
String[] fields = {"name", "content"};
QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
Query query = queryParser.parse(queryString);
Filter filter = null;
TopDocs topDocs = searcher.search(query, filter, 100);
System.out.println("docunent的数量:【" + topDocs.totalHits + "】");
//准备高亮器=================================
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
//设置显示的内容的大小(字数)
Fragmenter fragmenter = new SimpleFragmenter(20);
highlighter.setTextFragmenter(fragmenter );
//=========================================
for(ScoreDoc scoreDoc : topDocs.scoreDocs){
int index = scoreDoc.doc;
Document doc = searcher.doc(index);
//返回有高亮的content域的内容
String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
//替换document中content域的内容
doc.getField("content").setValue(hc);
LuceneUtil.printDocument(doc);
}
}
}
工具类:
package cn.lucene.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
public class LuceneUtil {
public static Document fileToDocument(File file){
Document document = new Document();
document.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
document.add(new Field("content", getFileContent(file), Store.YES, Index.ANALYZED));
return document;
}
public static String getFileContent(File file) {
StringBuffer sb = new StringBuffer();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
String str = null;
while((str = br.readLine()) != null){
sb.append(str);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return sb.toString();
}
public static void printDocument(Document document){
System.out.println("-------------start-----------------");
System.out.println("name:" + document.get("name"));
System.out.println("content:" + document.get("content"));
System.out.println("-------------end-----------------");
}
}