Luene高亮器

最新推荐文章于 2024-08-12 09:00:35 发布

码农wind

最新推荐文章于 2024-08-12 09:00:35 发布

阅读量910

点赞数

分类专栏： Lucene 文章标签： Lucene

Lucene 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

package cn.lucene.highlighter;

import java.io.File;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import cn.lucene.utils.LuceneUtil;

public class HighLighterDemo {

	String filePath = "F:\\workspace\\Lucene\\resource\\小笑话.txt";
	String indexPath = "F:\\workspace\\Lucene\\indexPath";
	
	Analyzer analyzer = new MMAnalyzer();
	@Test
	public void create() throws Exception{
		Directory directory = FSDirectory.getDirectory(indexPath);
		IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, MaxFieldLength.LIMITED);
		
		Document document = LuceneUtil.fileToDocument(new File(filePath));
		indexWriter.addDocument(document);
		
		indexWriter.close();
	}
	@Test
	public void query() throws Exception{
		String queryString = "房间";
		IndexSearcher searcher = new IndexSearcher(indexPath);
		
		String[] fields = {"name", "content"};
		QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
		Query query = queryParser.parse(queryString);
		Filter filter = null;
		TopDocs topDocs = searcher.search(query, filter, 100);
		System.out.println("docunent的数量：【" + topDocs.totalHits + "】");
		
		//准备高亮器=================================
		Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
		Scorer fragmentScorer = new QueryScorer(query);
		Highlighter highlighter =  new  Highlighter(formatter, fragmentScorer);
		
		//设置显示的内容的大小（字数）
		Fragmenter fragmenter = new SimpleFragmenter(20);
		highlighter.setTextFragmenter(fragmenter );
		//=========================================
		
		for(ScoreDoc scoreDoc : topDocs.scoreDocs){
			int index = scoreDoc.doc;
			Document doc = searcher.doc(index);
			//返回有高亮的content域的内容
			String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
			//替换document中content域的内容
			doc.getField("content").setValue(hc);
			LuceneUtil.printDocument(doc);
		}
	}
}

工具类：

package cn.lucene.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;

public class LuceneUtil {

	public static Document fileToDocument(File file){
		Document document = new Document();
		document.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
		document.add(new Field("content", getFileContent(file), Store.YES, Index.ANALYZED));
		return document;
	}

	public static String getFileContent(File file) {
		StringBuffer sb = new StringBuffer();
		try {
			BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
			String str = null;
			while((str = br.readLine()) != null){
				sb.append(str);
			}
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
		return sb.toString();
	}
	
	public static void printDocument(Document document){
		System.out.println("-------------start-----------------");
		System.out.println("name:" + document.get("name"));
		System.out.println("content:" + document.get("content"));
		System.out.println("-------------end-----------------");
	}
}