【Lucene】三个高亮显示模块的简单示例-Highlighter

Lucene针对高亮显示功能提供了两种实现方式,分别是Highlighter和FastVectorHighlighter

这里的三个示例都是使用Highlighter;

示例代码:

package com.tan.code;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class HighlighterTest {

	// 高亮處理文本(以下内容纯属虚构)
	private String text = "China has lots of people,most of them are very poor.China is very big.China become strong now,but the poor people is also poor than other controry";

	// 原文高亮
	public void highlighter() throws IOException, InvalidTokenOffsetsException {

		TermQuery termQuery = new TermQuery(new Term("field", "china"));
		TokenStream tokenStream = new SimpleAnalyzer(Version.LUCENE_43)
				.tokenStream("field", new StringReader(text));

		QueryScorer queryScorer = new QueryScorer(termQuery);
		Highlighter highlighter = new Highlighter(queryScorer);
		highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));
		System.out.println(highlighter.getBestFragment(tokenStream, text));
	}

	// 使用CSS進行高亮顯示處理
	public void highlighter_CSS(String searchText) throws ParseException,
			IOException, InvalidTokenOffsetsException {

		// 創建查詢
		QueryParser queryParser = new QueryParser(Version.LUCENE_43, "field",
				new SimpleAnalyzer(Version.LUCENE_43));
		Query query = queryParser.parse(searchText);

		// 自定义标注高亮文本标签
		SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(
				"<span style=\"backgroud:red\">", "</span>");
		// 语汇单元化
		TokenStream tokenStream = new SimpleAnalyzer(Version.LUCENE_43)
				.tokenStream("field", new StringReader(text));

		// 創建QueryScoer
		QueryScorer queryScorer = new QueryScorer(query, "field");

		Highlighter highlighter = new Highlighter(htmlFormatter, queryScorer);
		highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));

		System.out.println(highlighter.getBestFragments(tokenStream, text, 4,
				"..."));
	}

	// 高亮顯示搜索結果
	public void highlighter_SR(String field, String searchText)
			throws IOException, ParseException, InvalidTokenOffsetsException {

		//本次示例为了简便直接使用之前实验建立的索引
		Directory directory = new SimpleFSDirectory(new File("E://MyIndex"));
		IndexReader reader = DirectoryReader.open(directory);// 读取目录
		IndexSearcher search = new IndexSearcher(reader);// 初始化查询组件
		QueryParser parser = new QueryParser(Version.LUCENE_43, field,
				new IKAnalyzer(true));

		Query query = parser.parse(searchText);

		TopDocs td = search.search(query, 10000);// 获取匹配上元素的一个docid
		ScoreDoc[] sd = td.scoreDocs;// 加载所有的Documnet文档

		System.out.println("本次命中数据:" + sd.length);
		QueryScorer scorer = new QueryScorer(query, "content");

		Highlighter highlighter = new Highlighter(scorer);
		highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

		for (ScoreDoc scoreDoc : sd) {
			Document document = search.doc(scoreDoc.doc);
			String content = document.get("content");
			TokenStream tokenStream = TokenSources.getAnyTokenStream(
					search.getIndexReader(), scoreDoc.doc, "content", document,
					new IKAnalyzer(true));
			System.out.println(highlighter
					.getBestFragment(tokenStream, content));
		}
	}
}
测试代码:

	@Test
	public void test() throws IOException, InvalidTokenOffsetsException,
			ParseException {
		// fail("Not yet implemented");
		HighlighterTest highlighterTest = new HighlighterTest();
		highlighterTest.highlighter();
		highlighterTest.highlighter_CSS("china");
		highlighterTest.highlighter_CSS("poor");
		highlighterTest.highlighter_SR("content", "床前明月光");
	}

测试结果:

<B>China</B> has lots of people,most of them are very poor。<B>China</B> is very big.<B>China</B> become strong now,but the poor people is also poor than other controry
<span style="backgroud:red">China</span> has lots of people,most of them are very poor。<span style="backgroud:red">China</span> is very big.<span style="backgroud:red">China</span> become strong now,but the poor people is also poor than other controry
China has lots of people,most of them are very <span style="backgroud:red">poor</span>。China is very big.China become strong now,but the <span style="backgroud:red">poor</span> people is also <span style="backgroud:red">poor</span> than other controry
本次命中数据:1
<B>床</B><B>前</B><B>明月光</B>,疑是地上霜

【本例参考《Lucene In Action》】

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
Lucene是一个开源的全文搜索引擎库,用于实现文本索引和搜索功能。下面是一个Lucene的使用示例: 1. 创建索引: ```java import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; // 创建索引 public class Indexer { public static void main(String[] args) { // 索引存储路径 String indexPath = "path_to_index_directory"; try { // 创建分词器 Analyzer analyzer = new StandardAnalyzer(); // 创建索引配置 IndexWriterConfig config = new IndexWriterConfig(analyzer); // 创建索引目录 Directory directory = FSDirectory.open(Paths.get(indexPath)); // 创建索引写入器 IndexWriter indexWriter = new IndexWriter(directory, config); // 创建文档 Document document = new Document(); document.add(new Field("content", "Hello world!", TextField.TYPE_STORED)); // 将文档添加到索引中 indexWriter.addDocument(document); // 提交索引 indexWriter.commit(); // 关闭索引写入器 indexWriter.close(); } catch (IOException e) { e.printStackTrace(); } } } ``` 2. 搜索索引: ```java import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; // 搜索索引 public class Searcher { public static void main(String[] args) { // 索引目录路径 String indexPath = "path_to_index_directory"; try { // 创建分词器 Analyzer analyzer = new StandardAnalyzer(); // 创建索引目录 Directory directory = FSDirectory.open(Paths.get(indexPath)); // 创建索引读取器 IndexReader indexReader = DirectoryReader.open(directory); // 创建索引搜索器 IndexSearcher indexSearcher = new IndexSearcher(indexReader); // 创建查询解析器 QueryParser parser = new QueryParser("content", analyzer); // 创建查询 Query query = parser.parse("Hello"); // 执行查询,获取前n个结果 TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 遍历结果 for (ScoreDoc scoreDoc : scoreDocs) { int docId = scoreDoc.doc; Document document = indexSearcher.doc(docId); System.out.println("Content: " + document.get("content")); } // 关闭索引读取器 indexReader.close(); } catch (IOException | ParseException e) { e.printStackTrace(); } } } ``` 以上示例演示了如何使用Lucene创建索引并进行搜索。在创建索引时,需要定义分词器、索引配置、文档字段等。在搜索索引时,需要创建查询解析器、执行查询并获取结果。你可以根据自己的需求进行更多的定制和优化。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值