package lucene; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.RAMDirectory; import junit.framework.TestCase; public class WordsHighlighterTest extends TestCase { private IndexReader reader; RAMDirectory ramDirectory; final private static String FIELD_NAME = "contents"; final private static String queryString = "索引"; String [] words = { "1:索引内容结构:Document,以及包含于Document的多个Field索", "2:索引内容优先性调整因子,boost(可对整个Document或Field指定).", "3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等", "4:索引创建速度的调整" }; protected void setUp() throws Exception { ramDirectory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true); // for (String s : words){ // addDoc(indexWriter, s); // } for(int i=0;i<words.length;i++) { addDoc(indexWriter, words[i]); } indexWriter.optimize(); indexWriter.close(); reader = IndexReader.open(ramDirectory); } private void addDoc(IndexWriter indexWriter, String s) throws IOException { Document doc = new Document(); doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES)); indexWriter.addDocument(doc); } public void testSimpleWords() throws Exception { Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString); query = query.rewrite(reader); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Searcher searcher = new IndexSearcher(ramDirectory); Hits hits = searcher.search(query); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=/"red/">", "</font>"), new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(20)); Analyzer analyzer = new StandardAnalyzer(); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); String result = highlighter.getBestFragments(tokenStream, text, 4, ""); System.out.println("/t" + result); } } protected void tearDown() throws Exception { super.tearDown(); } } |