Lucene的配置与使用

最近在写一个项目的时候用到了luncene全文检索,为避免以后会忘记,现在将配置方案记录如下

        1.导入luncene所使用的jar包,如下图

                       

        解释下每个包的作用:IKAnalyzer2012_u6.jar,这是一个中文用分词包,具有对中文进行分词的功能

                                               lucene-analyzers-3.6.0.jar,这是lucene用于分词的包,

                                               luncene-core-3.6.0.jar,这是lucene的核心包,具有lucence核心功能

                                               lucene-highlighter-3.6.0.jar,这是lucene用于高亮显示所搜索的关键词的包,

                                               lucene-memory-3.6.0.jar,这是lucene用于高亮显示所搜索的关键词的包


         2.导入配置文件,在src目录下导入,在这里我导入到我所建立的source folder中,

            即config,该目录和src是同级的

                             

               解释下上面的三个配置文件:

                           1.ext.dic:用于存储扩展分词的词典,比如用户根据需要定义的一些词,比如人名

                           2.stopword.dic:用于存储停止词的词典,比如"的","和","了","都",这些连词,感叹词,不能作为搜索的关键词

                           3.IKAnalyzer.cfg.xml:用于加载上面两个词典的配置文件

                 下面三张图,介绍了这三个配置文件的内容

                             

                          

                         这是ext.dic的配置,配置了三个分词,比如程序员,牛X,搞基

                          

                       

                                     这是stopword.dic的配置,你可以加入中文的停用词,比如"都",“了”,“和”等

                     

             

                            这是IKAnalyzer.xml配置,这里加载了ext.dic,stopword.dic配置文件

                           

             3.接下来准备三个工具类,这三个工具类需要自己来写

                                        1.Configuration.java,用于打开和索引库的连接和取得分词器,下面贴出代码,可以直接使用

                                           

package com.itheima.elec.util;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Configuration {

	//索引库的目录位置
	private static Directory directory;
	//分词器
	private static Analyzer analyzer;
	
	static{
		try {
			/**索引库目录为D盘indexDir*/
			directory = FSDirectory.open(new File("D:/indexDir/"));
			/**词库分词*/
			analyzer = new IKAnalyzer();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public static Directory getDirectory() {
		return directory;
	}
	public static Analyzer getAnalyzer() {
		return analyzer;
	}
	
}

                       2.FileUploadDocument.java,因为lucene是使用Document对象来存储的,所以需要将javaBean对象转换成Docuemnt(lucene中的)才能存储到索引库中

                         这里使用ElecFileUpload这个对象举例,请根据你的具体对象来写这个工具类,这里提供两个方法,javaBean对象转成Document对象,Document对象

                    转换成javaBean对象

                       

package com.itheima.elec.util;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.util.NumericUtils;

import com.itheima.elec.domain.ElecFileUpload;

public class FileUploadDocument {

	/**将ElecFileUpload对象转换成Document对象*/
	public static Document FileUploadToDocument(ElecFileUpload elecFileUpload){
		Document document = new Document();
		String seqId = NumericUtils.intToPrefixCoded(elecFileUpload.getSeqID());
		//主键ID
		document.add(new Field("seqId",seqId,Store.YES,Index.NOT_ANALYZED));
		//文件名
		document.add(new Field("fileName", elecFileUpload.getFileName(), Store.YES, Index.ANALYZED));
		//文件描述
		document.add(new Field("comment", elecFileUpload.getComment(), Store.YES, Index.ANALYZED));
		//所属单位
		document.add(new Field("projId",elecFileUpload.getProjID(),Store.YES,Index.NOT_ANALYZED));
		//图纸类别
		document.add(new Field("belongTo",elecFileUpload.getBelongTo(),Store.YES,Index.NOT_ANALYZED));
		return document;
	}
	
	/**将Document对象转换成ElecFileUpload对象*/
	public static ElecFileUpload documentToFileUpload(Document document){
		ElecFileUpload elecFileUpload = new ElecFileUpload();
		Integer seqId = NumericUtils.prefixCodedToInt(document.get("seqId"));
		//主键ID
		elecFileUpload.setSeqID(seqId);
		//文件名
		elecFileUpload.setFileName(document.get("fileName"));
		//文件描述
		elecFileUpload.setComment(document.get("comment"));
		//所属单位
		elecFileUpload.setProjID(document.get("projId"));
		//图纸类别
		elecFileUpload.setBelongTo(document.get("belongTo"));
		return elecFileUpload;
	}
}

                                     3. LuceneUtils.java 这个方法,这个方法是核心方法,支持 对Lucene索引库的CURD操


package com.itheima.elec.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;

import com.itheima.elec.domain.ElecFileUpload;

public class LuceneUtils {
	
	/**
	 * 向索引库中添加数据
	 * @param elecFileUpload
	 */
	 public static void addIndex(ElecFileUpload elecFileUpload){
		  //使用ElecFileUpload对象转换成Document对象
		  Document document = FileUploadDocument.FileUploadToDocument(elecFileUpload);
		  /*新增,修改,删除,查询都会使用分词器*/
		  
		  try {
			IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
					Version.LUCENE_36, Configuration.getAnalyzer());
			IndexWriter indexWriter = new IndexWriter(
					Configuration.getDirectory(), indexWriterConfig);
			
			//添加数据
			
		     indexWriter.addDocument(document);
		     //注意用完需要关闭流
		     indexWriter.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		 
	 }

	public static List<ElecFileUpload> searchIndexByCondition(String projID,
			String belongTo, String queryString) {
		
		   //存放结果集
		  List<ElecFileUpload> list = new ArrayList<ElecFileUpload>();
		  try {
			  //创建IndexSearcher对象,用于查询索引库
			IndexSearcher indexSearcher = new IndexSearcher(IndexReader.open(Configuration.getDirectory()));
			
			//创建BooleanQuery查询,用于将多个条件连接在一起进行查询
			BooleanQuery booleanQuery = new BooleanQuery();
			 //条件一:所属单位
			if(StringUtils.isNotBlank(projID)){
				
				 //词条查询(创建一个词条,将查询条件放入到词条中,再放入到词条查询中)
				TermQuery query1 = new TermQuery(new Term("projId",projID));
				booleanQuery.add(query1, Occur.MUST);//Ocur.MUST相当于sql中的AND
			}
			//图书类别
			if(StringUtils.isNotBlank(belongTo)){
				
				//词条查询(创建一个词条,将查询条件放入到词条中,再放入到词条查询中)
				TermQuery query2 = new TermQuery(new Term("belongTo",belongTo));
				booleanQuery.add(query2,Occur.MUST);//Ocur.MUST相当于sql中的AND
			}
			 
			 //其他查询条件(文件名称和文件描述)
			if(StringUtils.isNotBlank(queryString)){
				//将多个字段进行查询时,使用QueryPaser
				QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"fileName","comment"}, Configuration.getAnalyzer());
                Query query3 =  queryParser.parse(queryString);
                booleanQuery.add(query3,Occur.MUST);//Ocur.MUST相当于sql中的AND
                
				
			}
			//向索引库中搜索数据
			/**
			 * 参数一:指定的查询条件(Luncene的写法)
			 * 参数二:返回前100条数据
			 */
			  TopDocs topDocs = indexSearcher.search(booleanQuery,100);
			  System.out.println("查询的总记录数:"+topDocs.totalHits);
			  //表示返回的结果集
			      ScoreDoc[] scorers = topDocs.scoreDocs;
			      
			      /**添加设置文字的高亮begin*/
			         //html页面高亮显示的格式化,默认是<b></b>
			       Formatter formatter = new SimpleHTMLFormatter("<font color='red'><b>","</b></font>");
			       //执行查询,因为高亮显示的就是查询条件
			        Scorer screr = new QueryScorer(booleanQuery);
			        
			        Highlighter highlighter = new Highlighter(formatter, screr);
			        
			        
			        //创建文字摘要,设置文字摘要的大小
			          int fragmentSize =20;
			         Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
			         highlighter.setTextFragmenter(fragmenter);
			      /**添加设置文字的高亮end*/
			         
			          if(scorers!=null&&scorers.length>0){
			        	  //遍历scorers对象 
			        	    for(ScoreDoc scoreDoc : scorers){
			        	    	System.out.println("相关都得分:"+scoreDoc.score);//默认得分高的对象排在前面
			        	    	//获得查询的文档的唯一编号,只有获得查询文档的唯一编号,才能查询到对应的文档
			        	    	 int doc = scoreDoc.doc;
			        	    	//使用编号,查询唯一的文档编号 
			        	    	  Document document = indexSearcher.doc(doc);
			        	    	  
			        	    	  /**获取文字高亮的信息 begin*/
			        	    	   //获取文字的高亮,一次只可以获得一个字段高亮的结果,如果获得不到返回null
			        	    	   String fileName = highlighter.getBestFragment(Configuration.getAnalyzer(),"fileName",document.get("fileName"));
			        	    	   //如果返回值是null,表示没有高亮,如果没有高亮的结果,就返回原来的结果
			        	    	    if(StringUtils.isBlank(fileName)){
			        	    	    	fileName = document.get("fileName");
			        	    	    	if(fileName!=null&&fileName.length()>fragmentSize){
			        	    	    		//截串,从0开始
			        	    	    		fileName = fileName.substring(0, fragmentSize);
			        	    	    	}
			        	    	    }
			        	    	    
			        	    	    //将高亮后的结果放到Doucment中
			        	    	    
			        	    	     document.getField("fileName").setValue(fileName);
			        	    	     
			        	    	     //查询索引字段为comment的数据
			        	    	      String comment = highlighter.getBestFragment(Configuration.getAnalyzer(),"comment", document.get("comment"));
			        	    	      
			        	    	      if(StringUtils.isBlank(comment)){
			        	    	    	  comment = document.get("comment");
			        	    	    	  if(fileName!=null&&fileName.length()>fragmentSize){
			        	    	    		 // 取子串,从0开始
			        	    	    		  comment = comment.substring(0,fragmentSize);
			        	    	    	  }
			        	    	      }
			        	    	      
			        	    	      //将高亮后的结果放到Doucment中
			        	    	      
			        	    	      document.getField("comment").setValue(comment);
			        	    	      
			        	    	      /**获取文字高亮的信息 end*/
			        	    	      
			        	    	      
			        	    	     //document对象转换成javaBean对象
			        	    	      
			        	    	      ElecFileUpload elecFileUpload = FileUploadDocument.documentToFileUpload(document);
			        	    	      list.add(elecFileUpload);
			        	    	      
			        	    }
			          }
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return list;
	}

	  /**
	   * 根据ID删除索引
	   * @param seqID
	   */
	public static void deleteIndex(int seqID) {
		
		String seqId =  NumericUtils.intToPrefixCoded(seqID);
		
		Term term = new Term("seqId",seqId);
		
		/**新增、修改、删除、查询都会使用分词器*/
		
		 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,Configuration.getAnalyzer());
		 
		   try {
			IndexWriter indexWriter  =  new IndexWriter(Configuration.getDirectory(),indexWriterConfig);
			 
			 //删除文档
			 indexWriter.deleteDocuments(term);
			 indexWriter.close();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
	}
	 
	

}

                               这里写了向索引库添加数据的方法和查询索引库(同时给查询的关键词加上高亮,所以代码较多),以及删除索引库的方法

                              注意两个类IndexWriter和Store,Store:表示是否将数据存储到索引库的数据区域

                                  indexWirter是是否将数据更新到索引库的目录区域


                                     

                                    

                 4.在需要应用Lucene的地方来应用Lucene,举例

                             

                                 

                                   同步数据库数据到索引库中

                               

public List<ElecFileUpload> findElecFileUploadByLuceneCondition(
			ElecFileUpload elecFileUpload) {
		
		//结果集
		List<ElecFileUpload> list = new ArrayList<ElecFileUpload>();
		 //创建用户存储从s
	    //所属单位
		 String projID = elecFileUpload.getProjID();
		//图书类别
		 String belongTo = elecFileUpload.getBelongTo();
		 //其他查询条件
		 String queryString = elecFileUpload.getQueryString();
		 
		//根据页面传来的条件,先组织查询条件,再查询Lucence索引库,根据索引库,返回查询结果集 List<ElecFileUpload>
		  List<ElecFileUpload> elecFileUploadList = LuceneUtils.searchIndexByCondition(projID,belongTo,queryString);
		  
		  //遍历每个List<ElecUpload>,取出每个seqID,通过seqID取出数据库中的对象
		  
		    if(elecFileUploadList!=null&&elecFileUploadList.size()>0){
		    	for(ElecFileUpload fileUpload : elecFileUploadList){
                     
		    		 //取出seqID
		    		 int seqID = fileUpload.getSeqID();
		    		
		    		 
		    		 //组织查询条件
		    		   String condition = " and o.seqID=?";
		    		   Object[] param = {seqID};
		    		   
		    		List<ElecFileUpload> uploads = elecFileUploadDao.findElecFileUploadByCondtionalWithSql(condition, param, null);
		    		
		    		 //让fileName和comment添加高亮
		    		ElecFileUpload upload = uploads.get(0);
		    		
		    		  upload.setFileName(fileUpload.getFileName());
		    		  upload.setComment(fileUpload.getComment());
		    		  
		    		  
		    		  list.addAll(uploads);
		    		   		    	}
		    }
		  
		return list;
	}

                                 使用Lucene查询数据

                        










                          

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值