lucene3.x的简单使用梳理

最新推荐文章于 2022-09-08 11:20:07 发布

代码老中医

最新推荐文章于 2022-09-08 11:20:07 发布

阅读量581

点赞数 1

分类专栏： lucene 文章标签： lucene 全文检索文档搜索

本文链接：https://blog.csdn.net/PYXLY1314/article/details/52803796

版权

lucene 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

再看本文前，初学者需要先去了解一下，全文检索的基本原里：将原始的文档数据（或者自己业务系统中的各种业务数据）通过lucene的API进行转换，生成对应的索引文件（这里的索引文件其实就是可以理解成系统文件，它只能被lucene解析），至于文件的格式和命名等，在网上都有介绍；有兴趣的同学可以深入研究一下；生成索引以后我们就可以通过luncen的API进行搜索操作了，调用API，lucene会根据所以文件的路径查找到索引文件，然后解析它们，做一系列的赛选匹配，打分，等操作；最终返回搜索关键字相关的结果；

1、lucene开发涉及到的jar包

lucene-analyzers-3.6.0.jar,lucene-core-3.6.1.jar,lucene-core-3.6.1-javadoc.jar

2、生成索引的代码，简单示例：（其中有一个需要注意的点，就是每次建索引前都先删除原先的索引）

       public static final String FILE_PATH_LINUX = "/www/luceneIndex/public"+File.separator+"indexs";
	/**
	 * 针对课程生成索引
	 * 生成课程名称、标签索引
	 * @param vo 起止时间
	 */
	@SuppressWarnings("deprecation")
	private void createCourseLucene(LuceneVO vo){
		IndexWriter writer = null;
		try {
			File file = null;
			String pathFile =LuceneTimeTask.FILE_PATH_LINUX;
			file = new File(pathFile); 
	        if(!file.exists()){
	        	file.mkdirs();
	        }
			Directory dir=FSDirectory.open(file);
			List<Course> list = courseService.getCourseByBetweenTime(vo);
			if(null!=list && list.size()>0){
				Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);  
			    if(file.isDirectory() && file.listFiles().length == 0){
					writer = new IndexWriter(dir, analyzer,
							true, IndexWriter.MaxFieldLength.UNLIMITED);
				} else {
					deleteIndex(2,dir);
					writer = new IndexWriter(dir, analyzer,
							false, IndexWriter.MaxFieldLength.UNLIMITED);
				}
			}
			SimpleDateFormat sd1 = new SimpleDateFormat("MM-dd");
			SimpleDateFormat sd = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			for(Course course : list) {
	            Document doc=new Document();  
	            Field f5 = new Field("courseName", course.getName(), Store.YES,Index.ANALYZED);
	            Field f4 = new Field("courseTag", course.getTag(), Store.YES,Index.ANALYZED);
	            f5.setBoost(5f);
	            f4.setBoost(4f);
	            doc.add(f5);  
	            doc.add(f4); 
	            doc.add(new Field("luceneType",Constant.LUCENE_COURSE, Store.YES,Index.ANALYZED));
	            doc.add(new Field("courseId", course.getId()+"", Store.YES,Index.NOT_ANALYZED));
	            doc.add(new Field("courseClickNum", course.getHits()+"", Store.YES,Index.NOT_ANALYZED)); 
	            doc.add(new Field("courseCreateTime", sd1.format(course.getCreatetime()), Store.YES,Index.NOT_ANALYZED));
	            doc.add(new Field("courseCreateTimeNum", course.getCreatetime().getTime()+"", Store.YES,Index.NOT_ANALYZED));
	            doc.add(new Field("courseSname", course.getSphotoname(), Store.YES,Index.NOT_ANALYZED));
	            doc.add(new Field("courseIntroduce", course.getCourseDetail().getIntroduction(), Store.YES,Index.NOT_ANALYZED));
	            doc.add(new Field("courseCreateTime_YMD", sd.format(course.getCreatetime()), Store.YES,Index.NOT_ANALYZED));
	            writer.addDocument(doc);
	        }  
			
		} catch (Exception e) {
			System.out.println("生成课程索引失败！");
			e.printStackTrace();
		}finally{
			if(null!=writer){
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

3、针对以上的索引文件，来完成快速搜索，简单代码如下：

/**
	 * 加载数据
	 */
	public void listmodel(){
		StringBuffer sb = new StringBuffer();
		
		IndexReader reader=null;
		try {
			pgResult = pgResult == null ? new PageResult<LuceneVO>()
					: pgResult;
			vo = vo == null ? new LuceneVO() : vo;
			
			if(null!=vo.getSearchStr() && !"".equals(vo.getSearchStr())){
				Directory dir= init();
				reader=IndexReader.open(dir);  
				int termNo = getTermNo();
				
				IndexSearcher searcher=new IndexSearcher(reader);  
				
				String[] serchArray=null;
				Occur[] occArray=null;
				SortField[] sf = null;
				//int type = vo.getSearchType();
				int totalRecord=0;
				ScoreDoc[] scoreDocs;//定义一个检索结果集合
				Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36); 
				serchArray = new String[]{"equipName","equipFlag","equipIntroduce","newsTitle","newsTag","newsDescription","courseName","courseTag",
						"chapterName","chapterTeacherName","datumName","datumTag"};
				occArray = new Occur[]{BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD
						,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
				Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, vo.getSearchStr(), serchArray,occArray, analyzer);
			    TopDocs topdocs = searcher.search(query, (termNo+1)*Constants.TERM_NUM);
			    totalRecord += topdocs.totalHits;
			    scoreDocs = topdocs.scoreDocs;
				
				int begin = pgResult.getPageSize() * (pgResult.getCurrentPage() - 1) ;
				int end = Math.min(begin + pgResult.getPageSize(), totalRecord);
				SimpleHTMLFormatter shf = new SimpleHTMLFormatter("<b>","</b>");
				Highlighter hl = new Highlighter(shf,new QueryScorer(query));
				List<LuceneVO> relist = new ArrayList<LuceneVO>();
				for(int i=begin; i < end; i++) {  
					int doc = scoreDocs[i].doc;  
					//int doc = list.get(i).doc;
					Document document = searcher.doc(doc); 
					String type = document.get("luceneType");
					if(type.equals(Constants.LUCENE_COURSE)){
						String courseName = "<span class='color-ff0'>[网校]</span>"+document.get("courseName");
						String courseIntroduce = document.get("courseIntroduce");
						
						TokenStream ts = analyzer.tokenStream("courseName", new StringReader(courseName));  
				        String courseNameHe =hl.getBestFragment(ts, courseName);  
				        
				        TokenStream ts2 = analyzer.tokenStream("courseIntroduce", new StringReader(courseIntroduce));  
				        String courseIntroduceHe = hl.getBestFragment(ts2, courseIntroduce);
				        
				        sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/course_sub/teaching/").append(document.get("courseId")).append(".html'")
				        .append(" title='").append(null!=courseIntroduceHe?courseIntroduceHe:courseIntroduce)
				        .append("' target='_blank' >").append(null!=courseNameHe?courseNameHe:courseName)
				        .append(" </a><span>")
				        .append(document.get("courseCreateTime"))
				        .append("</span></li>");
						
					}else if(type.equals(Constants.LUCENE_EQUIP)){
						String equipName = "<span class='color-ff0'>[装备]</span>"+document.get("equipName");
						String equipIntroduce = document.get("equipIntroduce");
						
						TokenStream ts = analyzer.tokenStream("equipName", new StringReader(equipName));  
				        String equipNameHe = hl.getBestFragment(ts, equipName);  
				        
				        TokenStream ts2 = analyzer.tokenStream("equipIntroduce", new StringReader(equipIntroduce));  
				        String equipIntroduceHe = hl.getBestFragment(ts2, equipIntroduce); 
				        
				        sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/equip_sub/detail/").append(document.get("equipClassCode")).
				        append("_").append(document.get("equipId")).append(".html'")
				        .append(" title='").append(null!=equipIntroduceHe?equipIntroduceHe:equipIntroduce)
				        .append("' target='_blank' >")
				        .append(null!=equipNameHe?equipNameHe:equipName)
				        .append(" </a><span>")
				        .append(document.get("equipCreateTime"))
				        .append("</span></li>");
						
					}else if(type.equals(Constants.LUCENE_CHAPTER)){
						String chapterName = "<span class='color-ff0'>[网校]</span>"+document.get("chapterName");
						String chapterTeacherName = document.get("chapterTeacherName");
						
						TokenStream ts = analyzer.tokenStream("chapterName", new StringReader(chapterName));  
				        String chapterNameHe = hl.getBestFragment(ts, chapterName);  
				        
				        TokenStream ts1 = analyzer.tokenStream("chapterTeacherName", new StringReader(chapterTeacherName));  
				        String chapterTeacherNameHe = hl.getBestFragment(ts1, chapterTeacherName);  
				        sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/course_sub/teaching/").append(document.get("courseId")).append("_").append(document.get("chapterId")).append(".html'")
				        .append(" title='")
				        .append(null!=chapterTeacherNameHe?chapterTeacherNameHe:chapterTeacherName)
				        .append("' target='_blank' >")
				        .append(null!=chapterNameHe?chapterNameHe:chapterName)
				        .append(" </a><span>")
				        .append(document.get("chapterCreateTime"))
				        .append("</span></li>");
						
					}else if(type.equals(Constants.LUCENE_NEWS)){
						String newsTitle = "<span class='color-ff0'>[资讯]</span>"+document.get("newsTitle");
						String newsDescription = document.get("newsDescription");
						
						TokenStream ts = analyzer.tokenStream("newsTitle", new StringReader(newsTitle));  
				        String newsTitleHe = hl.getBestFragment(ts, newsTitle);
				        
				        TokenStream ts1 = analyzer.tokenStream("newsDescription", new StringReader(newsDescription));  
				        String newsDescriptionHe = hl.getBestFragment(ts1, newsDescription);
						
						sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/news_sub/items/").append(document.get("newsId"))
						.append("_").append(document.get("newsClassCode")).append(".html'")
						.append(" title='").append(null!=newsDescriptionHe?newsDescriptionHe:newsDescription)
				        .append("' target='_blank' >")
				        .append(null!=newsTitleHe?newsTitleHe:newsTitle)
				        .append(" </a><span>")
				        .append(document.get("newsCreateTime"))
				        .append("</span></li>");
					}else{//资料
						String datumName = "<span class='color-ff0'>[文档]</span>"+document.get("datumName");
						
						TokenStream ts = analyzer.tokenStream("datumName", new StringReader(datumName));  
				        String datumNameHe = hl.getBestFragment(ts, datumName);  
				        
				        sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/docview_sub/").append(document.get("datumId")).append(".html'")
				        .append(" target='_blank' >")
				        .append(null!=datumNameHe?datumNameHe:datumName)
				        .append(" </a><span>")
				        .append(document.get("datumCreateTime"))
				        .append("</span></li>");
					}
					
					
			        
				} 
				
				pgResult.setList(relist);
				pgResult.setTotalRecord(totalRecord);
				
			}
			
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			if(null!=reader){
				try {
					reader.close();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
		}
		
		Map remap = new HashMap();
		remap.put("sb", sb);
		remap.put("currentPage", pgResult.getCurrentPage());
		remap.put("totalPage", pgResult.getTotalPage());
		remap.put("searchStr", vo.getSearchStr());
		remap.put("searchType", vo.getSearchType());
		
		String restr = JsonHelper.getGson().toJson(remap);
		
		this.writeTextToResponse(restr);
	}

至于复杂的搜索操作，以后有时间了在深入研究

代码老中医

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene3.x的简单使用梳理

再看本文前，初学者需要先去了解一下，全文检索的基本原里：将原始的文档数据（或者自己业务系统中的各种业务数据）通过lucene的API进行转换，生成对应的索引文件（这里的索引文件其实就是可以理解成系统文件，它只能被lucene解析），至于文件的格式和命名等，在网上都有介绍；有兴趣的同学可以深入研究一下；生成索引以后我们就可以通过luncen的API进行搜索操作了，调用API，lucene会根据所以
复制链接

扫一扫

专栏目录