使用lucene将索引结果进行高亮显示

最新推荐文章于 2021-02-19 13:28:32 发布

a13272899370

最新推荐文章于 2021-02-19 13:28:32 发布

阅读量651

点赞数

分类专栏： lucene 文章标签： lucene file exception query path date

本文链接：https://blog.csdn.net/a13272899370/article/details/6683198

版权

lucene 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

加入jar包：

代码：

package cn.com.leadfar.test;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.IOException;

import java.text.SimpleDateFormat;

import java.util.Collection;

import java.util.Date;

import junit.framework.TestCase;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.commons.io.FileUtils;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.highlight.Highlighter;

import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;

import org.apache.lucene.search.highlight.QueryScorer;

import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.store.SimpleFSDirectory;

import org.apache.lucene.util.Version;

public class BuildIndex extends TestCase {

public void testBuildIndex() throws Exception{

//指定索引文件存放位置

File file = new File("E:\\lucene\\buildIndex");

//要加入索引库的文件

File file2 = new File("E:\\OA");

//指定索引文件存放的目录

Directory dir = new SimpleFSDirectory(file);

//指定分词器

//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//改为paoding分词器

Analyzer analyzer = new PaodingAnalyzer();

//创建IndexWriter对象

/**

* 参数1：指定文件索引存放位置

* 参数2：指定分词器

* 参数3：优化索引字段最大长度

IndexWriter indexWriter = new IndexWriter(dir,analyzer,MaxFieldLength.LIMITED);

indexDocs(file2,indexWriter);

//作一些优化

indexWriter.optimize();

//关闭IndexWriter对象

indexWriter.close();

}

private void indexDocs(File f,IndexWriter indexWriter) throws Exception{

Collection<File> files = FileUtils.listFiles(f,new String[]{"rar"},true);

for(File file:files){

//每个文件创建一个Document对象

Document doc = createDocument(file);

//加入Document

indexWriter.addDocument(doc);

}

public Document createDocument(File f) throws FileNotFoundException{

SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");

//创建文档对象

Document doc = new Document();

//搜索f目录下的所有包含java的文件

/**

* 第一个参数：指定一个搜索目录

*第二个参数：指定文件名包含的字符

*第三个参数：为true表示搜索子目录

//创建一个索引字段，

/**

* 第一个参数:表示索引字段名

* 第二个参数:表示指定要搜索的字符串

* 第三个参数：表示是否添加到索引文件中

* 第四个参数:表示是否作为索引字段

Field field = new Field("path",f.getPath(),Field.Store.YES,Field.Index.ANALYZED);

doc.add(field);

FileReader fileReader = new FileReader(f);

/**

* 将文件内容作为索引

* 默认：

* 1、不添加到索引文件

* 2、将此字段作为索引字段

Field field2 = new Field("content",fileReader);

doc.add(field2);

Date date = new Date(f.lastModified());

//将最后更新时间作为索引字段

Field field3 = new Field("lastUpdateTime",simpleDateFormat.format(date),Field.Store.YES,Field.Index.ANALYZED);

doc.add(field3);

return doc;

}

public void testSearchIndex() throws IOException, ParseException, InvalidTokenOffsetsException{

File indexDir =new File("E:\\lucene\\buildIndex");

//指定分词器

//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//改为paoding分词器

Analyzer analyzer = new PaodingAnalyzer();

//指定搜索的目录

Directory dir = FSDirectory.open(indexDir);

//luncene搜索功能接口

/**

* 参数1：指定搜索的索引文件存放的目录

* 参数2：为true表示不允许修改，即只读

IndexSearcher indexSrarcher = new IndexSearcher(dir,true);

//根据指定的字段查询

/**

* 参数一：指定luncene版本

* 参数二：表示指定查询的字段

* 参数三：表示指定分词器

QueryParser parser = new QueryParser(Version.LUCENE_30,"path", analyzer);

//输入查询的字符串

Query query = parser.parse("实体");

//搜索到得记录数

ScoreDoc[] hits = indexSrarcher.search(query, null, 1000).scoreDocs;

System.out.println("查询到得记录数为【"+hits.length+"】");

for(ScoreDoc scoreDoc:hits){

//通过文档标识搜索

Document doc =indexSrarcher.doc(scoreDoc.doc);

//准备高亮显示

SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");

//创建高亮对象

Highlighter highlighter = new Highlighter(formatter,new QueryScorer(query));

String text = highlighter.getBestFragment(analyzer,"path", doc.get("path"));

System.out.println("高亮显示的标题为"+text);

//System.out.println("标题为-【"+doc.get("path")+"】最后更新时间为【"+doc.get("lastUpdateTime")+"】");

//System.out.println("内容为-【"+doc.get("content"));

}

a13272899370

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录