使用lucene将索引结果进行高亮显示

 

  加入jar包:

 

 

代码:

 

package cn.com.leadfar.test;

 

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.IOException;

import java.text.SimpleDateFormat;

import java.util.Collection;

import java.util.Date;

 

import junit.framework.TestCase;

 

import net.paoding.analysis.analyzer.PaodingAnalyzer;

 

import org.apache.commons.io.FileUtils;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.highlight.Highlighter;

import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;

import org.apache.lucene.search.highlight.QueryScorer;

import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.store.SimpleFSDirectory;

import org.apache.lucene.util.Version;

 

public class BuildIndex extends TestCase {

                  

                   public void testBuildIndex() throws Exception{

                            //指定索引文件存放位置

                            File file  = new File("E:\\lucene\\buildIndex");

                            //要加入索引库的文件

                            File file2  = new File("E:\\OA");

                            //指定索引文件存放的目录

                            Directory dir = new SimpleFSDirectory(file);

                  

                            //指定分词器

                            //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

                            //改为paoding分词器

                            Analyzer analyzer = new PaodingAnalyzer(); 

                            //创建IndexWriter对象

                            /**

                             * 参数1:指定文件索引存放位置

                             * 参数2:指定分词器

                             * 参数3:优化索引字段最大长度

                             */

                            IndexWriter indexWriter = new IndexWriter(dir,analyzer,MaxFieldLength.LIMITED);

        

                           

                                     indexDocs(file2,indexWriter);

                                     //作一些优化

                                     indexWriter.optimize();

                                     //关闭IndexWriter对象

                                     indexWriter.close();

                   }

                  

                  

                   private void indexDocs(File f,IndexWriter indexWriter) throws Exception{

                           

                            Collection<File> files = FileUtils.listFiles(f,new String[]{"rar"},true);

                            for(File file:files){

                                     //每个文件创建一个Document对象

                                     Document doc =  createDocument(file);

                                    

                                     //加入Document

                                     indexWriter.addDocument(doc);

                            }

                           

                  

                   }

 

                            public Document createDocument(File f) throws FileNotFoundException{

                           

                                     SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");

                                    

                                     //创建文档对象

                                     Document doc = new Document();

                                    

                                     //搜索f目录下的所有包含java的文件

                                     /**

                                      * 第一个参数:指定一个搜索目录

                                      *第二个参数:指定文件名包含的字符

                                      *第三个参数:为true表示搜索子目录

                                      */

                           

                                    

                                     //创建一个索引字段,

                                     /**

                                      * 第一个参数:表示索引字段名

                                      * 第二个参数:表示指定要搜索的字符串

                                      * 第三个参数:表示是否添加到索引文件中

                                      * 第四个参数:表示是否作为索引字段

                                      */

                                               Field field = new Field("path",f.getPath(),Field.Store.YES,Field.Index.ANALYZED);

                                              

                                               doc.add(field);

                                               FileReader fileReader = new FileReader(f);

                                              

                                               /**

                                                * 将文件内容作为索引

                                                * 默认:

                                                * 1、不添加到索引文件

                                                * 2、将此字段作为索引字段

                                                */

                                               Field  field2 = new Field("content",fileReader);

                                              

                                               doc.add(field2);

                                                                

                                               Date date = new Date(f.lastModified());

                                               //将最后更新时间作为索引字段

                                               Field  field3 = new Field("lastUpdateTime",simpleDateFormat.format(date),Field.Store.YES,Field.Index.ANALYZED);

                                               doc.add(field3);

                           

                                    

                                    

                                     return doc;

                            }

                           

                           

                            public void testSearchIndex() throws IOException, ParseException, InvalidTokenOffsetsException{

                                     File indexDir =new File("E:\\lucene\\buildIndex");

                                     //指定分词器

                                     //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

                                     //改为paoding分词器

                                     Analyzer analyzer = new PaodingAnalyzer(); 

                                     //指定搜索的目录

                                     Directory dir = FSDirectory.open(indexDir);

                                     //luncene搜索功能接口

                                     /**

                                      * 参数1:指定搜索的索引文件存放的目录

                                      * 参数2:为true表示不允许修改,即只读

                                      */

                                     IndexSearcher  indexSrarcher = new IndexSearcher(dir,true);

                                     //根据指定的字段查询

                                     /**

                                      * 参数一:指定luncene版本

                                       * 参数二:表示指定查询的字段

                                      * 参数三:表示指定分词器

                                      */

                                     QueryParser parser = new QueryParser(Version.LUCENE_30,"path", analyzer);

                                     //输入查询的字符串

                                     Query query = parser.parse("实体");

                            //搜索到得记录数

                            ScoreDoc[] hits = indexSrarcher.search(query, null, 1000).scoreDocs;

                            System.out.println("查询到得记录数为【"+hits.length+"】");

                           

                            for(ScoreDoc scoreDoc:hits){

                                     //通过文档标识搜索

                                     Document doc =indexSrarcher.doc(scoreDoc.doc);

                                     //准备高亮显示

                                     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");

                                    

                                     //创建高亮对象

                                     Highlighter highlighter = new Highlighter(formatter,new QueryScorer(query));

                            String text =    highlighter.getBestFragment(analyzer,"path", doc.get("path"));

                                    

                           

                            System.out.println("高亮显示的标题为"+text);

                            //System.out.println("标题为-【"+doc.get("path")+"】最后更新时间为【"+doc.get("lastUpdateTime")+"】");

                                    

                                     //System.out.println("内容为-【"+doc.get("content"));

                                    

                            }

                           

                           

                            }

                           

                  

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值