Lucene搜索引擎入门以及分页

最新推荐文章于 2020-01-17 15:53:51 发布

qq_43181626

最新推荐文章于 2020-01-17 15:53:51 发布

阅读量161

点赞数

1、生成索引
目的：索引数据目录，在指定目录生成索引文件

首先导入maven依赖

org.apache.lucene lucene-core 5.3.1 org.apache.lucene lucene-queryparser 5.3.1 org.apache.lucene lucene-analyzers-common 5.3.1 生成索引的具体代码：

package com.zking.lunece;

import java.io.IOException;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.zking.dao.BlogDao;
import com.zking.util.PropertiesUtil;

/**

构建Lucene索引
1、构建索引
2、读取索引文件，获取命中片段
3、使得命中片段高亮显示
@author LJ
@Date 2018年12月11日

@Time 下午4:47:49
*/
public class IndexStarter {

private static BlogDao blogDao = new BlogDao();

public static void main(String[] args) {
//索引输出流配置对象
IndexWriterConfig conf = new IndexWriterConfig(new SmartChineseAnalyzer());
Directory d;
IndexWriter indexWriter = null;
try {
//获取索引文件存放地址对象
d = FSDirectory.open(Paths.get(PropertiesUtil.getValue(“indexPath”)));
indexWriter = new IndexWriter(d, conf);

 	try {
 		//为数据库中所有数据构建索引
 		List<Map<String, Object>> list = blogDao.list(null, null);
 		for (Map<String, Object> map : list) {
 			Document doc = new Document();
 			//Field.Store.YES是否存储到硬盘
 			doc.add(new StringField("id", (String) map.get("id"), Field.Store.YES));
 			//TextField用于对一句话分词处理
 			doc.add(new TextField("title", (String) map.get("title"), Field.Store.YES));
 			doc.add(new StringField("url", (String) map.get("url"), Field.Store.YES));
 			
 			indexWriter.addDocument(doc);
 		}
 	} catch (InstantiationException e) {
 		e.printStackTrace();
 	} catch (IllegalAccessException e) {
 		e.printStackTrace();
 	} catch (SQLException e) {
 		e.printStackTrace();
 	}
 } catch (IOException e) {
 	e.printStackTrace();
 }finally {
 	if(indexWriter != null) {
 		try {
 			indexWriter.close();//关闭索引输出流
 		} catch (IOException e) {
 			e.printStackTrace();
 		}
 	}
 }

}
}
lucene.properties文件：

properties工具类：

package com.zking.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/**

properties工具类
@author user

*/
public class PropertiesUtil {

/**
 * 根据key获取value值
 * @param key
 * @return
 */
public static String getValue(String key){
	Properties prop=new Properties();
	InputStream in=new PropertiesUtil().getClass().getResourceAsStream("/lucene.properties");
	try {
		prop.load(in);
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	return prop.getProperty(key);
}

}
2、使用索引（进行分页）
从索引文件中拿数据

（1）获取输入流（通过dirReader）

（2）获取索引搜索对象（通过输入流来拿）

（3）获取查询对象（通过查询解析器来获取，解析器是通过分词器获取）

（4）获取包含关键字排前面的文档对象集合

（5）可以获取对应文档的内容

分页目前找到2种解决方案，推荐用第1种，撑住大数据情况

第1种：用searchAfter()方法，先生成一个ScoreDoc后用searchAfter方法（我先贴一下两者的区别部分）

第2种：比如查20-30条的记录，先查出前30条，后面for循环从i = 20开始就行

第1种的所有代码：

package com.zking.web;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.struts2.ServletActionContext;

import com.zking.dao.BlogDao;
import com.zking.util.PageBean;
import com.zking.util.PropertiesUtil;
import com.zking.util.StringUtils;

public class BlogAction {
private String title;
private BlogDao blogDao = new BlogDao();

public String getTitle() {
	return title;
}

public void setTitle(String title) {
	this.title = title;
}

public String execute() {
	try {
		HttpServletRequest request = ServletActionContext.getRequest();
		PageBean pageBean = new PageBean();
		pageBean.setRequest(request);

		if (StringUtils.isBlank(title)) {
			List<Map<String, Object>> blogList = this.blogDao.list(title, pageBean);
			request.setAttribute("blogList", blogList);
		} 
		else {
			SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
			//注意:索引输入流不是new出来的，是通过目录读取工具类打开的
			IndexReader indexReader = DirectoryReader
					.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
			//获取索引搜索对象
			IndexSearcher searcher = new IndexSearcher(indexReader);
			//拿一句话到索引目录的索引文件中的词库进行关键词碰撞
			Query query = new QueryParser("title", analyzer).parse(title);
			ScoreDoc sd;
			if(pageBean.getPage() == 1) {
				sd = null;
			}
			else {
				// 查询数据，结束页面自前的数据都会查询到，但是只取本页的数据
				TopDocs td = searcher.search(query, pageBean.getStartIndex());
				sd = td.scoreDocs[pageBean.getStartIndex()-1];//获取上一页的最后一条数据
			}
			
			//核心方法
			TopDocs topDocs = searcher.searchAfter(sd, query, 10);
			// 将碰撞出来的关键词点亮
			QueryScorer queryScorer = new QueryScorer(query);
			// 以什么形式点亮关键词
			Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</b></span>");
			Highlighter highlighter = new Highlighter(formatter, queryScorer);

			List<Map<String, Object>> blogList = new ArrayList<>();
			Map<String, Object> map = null;

			ScoreDoc[] scoreDocs = topDocs.scoreDocs;

			pageBean.setTotal(topDocs.totalHits);
			for (ScoreDoc scoreDoc : scoreDocs) {
				map = new HashMap<>();
				Document doc = searcher.doc(scoreDoc.doc);

				map.put("id", doc.get("id"));

				String highlighterTitle = doc.get("title");
				if (StringUtils.isNotBlank(highlighterTitle)) {
					highlighterTitle = highlighter.getBestFragment(analyzer, "title", highlighterTitle);
				}
				map.put("title", highlighterTitle);
				map.put("url", doc.get("url"));
				blogList.add(map);
			}
			request.setAttribute("blogList", blogList);
		}
		request.setAttribute("pageBean", pageBean);
	} catch (Exception e) {
		e.printStackTrace();
	}
	return "blogList";
}

}
第2种的所有代码：

package com.zking.web;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import com.zking.dao.BlogDao;
import com.zking.util.PageBean;
import com.zking.util.PropertiesUtil;
import com.zking.util.StringUtils;

public class BlogAction {
private String title;
private BlogDao blogDao = new BlogDao();

public String getTitle() {
	return title;
}

public void setTitle(String title) {
	this.title = title;
}

public String execute() {
	try {
		HttpServletRequest request = ServletActionContext.getRequest();
		PageBean pageBean = new PageBean();
		pageBean.setRequest(request);
		
		if(StringUtils.isBlank(title)) {
			List<Map<String, Object>> blogList = this.blogDao.list(title, pageBean);
			request.setAttribute("blogList", blogList);
		}
		else {
			SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
			IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
			IndexSearcher searcher = new IndexSearcher(indexReader);
			//拿一句话到索引目录的索引文件中的词库进行关键词碰撞
			Query query = new QueryParser("title", analyzer).parse(title);
			//查询数据，结束页面自前的数据都会查询到，但是只取本页的数据
			TopDocs topDocs = searcher.search(query , pageBean.getPage()*pageBean.getRows());
			
			//将碰撞出来的关键词点亮
			QueryScorer queryScorer = new QueryScorer(query);
			//以什么形式点亮关键词
			Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</b></span>");
			Highlighter highlighter = new Highlighter(formatter, queryScorer);
			
			List<Map<String, Object>> blogList = new ArrayList<>();
			Map<String, Object> map = null;
			
			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			pageBean.setTotal(topDocs.totalHits);
			int end = Math.min(pageBean.getStartIndex() + pageBean.getRows(), scoreDocs.length);
			for(int i = pageBean.getStartIndex();i< end;i++){
				map = new HashMap<>();
                Document doc = indexReader.document(scoreDocs[i].doc);
                
                map.put("id", doc.get("id"));
				
				String highlighterTitle = doc.get("title");
				if(StringUtils.isNotBlank(highlighterTitle)) {
					highlighterTitle = highlighter.getBestFragment(analyzer, "title", highlighterTitle);
				}
				map.put("title", highlighterTitle);
				map.put("url", doc.get("url"));
				blogList.add(map);
            }
			request.setAttribute("blogList", blogList);
		}
		request.setAttribute("pageBean", pageBean);
	} catch (Exception e) {
		e.printStackTrace();
	}
	return "blogList";
}

}
pageBean帮助类：

package com.zking.util;

import java.util.Map;

import javax.servlet.http.HttpServletRequest;

/**

分页工具类

*/
public class PageBean {

private int page = 1;// 页码

private int rows = 10;// 页大小

private int total = 0;// 总记录数

private boolean pagination = true;// 是否分页

// 保存上次查询的参数
private Map<String, String[]> paramMap;
// 保存上次查询的url
private String url;

public void setRequest(HttpServletRequest request) {
	String page = request.getParameter("page");
	String rows = request.getParameter("rows");
	String pagination = request.getParameter("pagination");
	this.setPage(page);
	this.setRows(rows);
	this.setPagination(pagination);
	this.setUrl(request.getRequestURL().toString());
	this.setParamMap(request.getParameterMap());
}

public PageBean() {
	super();
}

public Map<String, String[]> getParamMap() {
	return paramMap;
}

public void setParamMap(Map<String, String[]> paramMap) {
	this.paramMap = paramMap;
}

public String getUrl() {
	return url;
}

public void setUrl(String url) {
	this.url = url;
}

public int getPage() {
	return page;
}

public void setPage(int page) {
	this.page = page;
}

public void setPage(String page) {
	if(StringUtils.isNotBlank(page)) {
		this.page = Integer.parseInt(page);
	}
}

public int getRows() {
	return rows;
}

public void setRows(String rows) {
	if(StringUtils.isNotBlank(rows)) {
		this.rows = Integer.parseInt(rows);
	}
}

public int getTotal() {
	return total;
}

public void setTotal(int total) {
	this.total = total;
}

public void setTotal(String total) {
	if(StringUtils.isNotBlank(total)) {
		this.total = Integer.parseInt(total);
	}
}

public boolean isPagination() {
	return pagination;
}

public void setPagination(boolean pagination) {
	this.pagination = pagination;
}

public void setPagination(String pagination) {
	if(StringUtils.isNotBlank(pagination) && "false".equals(pagination)) {
		this.pagination = Boolean.parseBoolean(pagination);
	}
}

/**
 * 最大页
 * @return
 */
public int getMaxPage() {
	int max = this.total/this.rows;
	if(this.total % this.rows !=0) {
		max ++ ;
	}
	return max;
}

/**
 * 下一页
 * @return
 */
public int getNextPage() {
	int nextPage = this.page + 1;
	if(nextPage > this.getMaxPage()) {
		nextPage = this.getMaxPage();
	}
	return nextPage;
}

/**
 * 上一页
 * @return
 */
public int getPreviousPage() {
	int previousPage = this.page -1;
	if(previousPage < 1) {
		previousPage = 1;
	}
	return previousPage;
}
	

/**
 * 获得起始记录的下标
 * 
 * @return
 */
public int getStartIndex() {
	return (this.page - 1) * this.rows;
}

@Override
public String toString() {
	return "PageBean [page=" + page + ", rows=" + rows + ", total=" + total + ", pagination=" + pagination + "]";
}

}
运行在搜索框输入“使用方法”后的效果：

在这里插入图片描述

qq_43181626

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene搜索引擎入门以及分页

1、生成索引目的：索引数据目录，在指定目录生成索引文件首先导入maven依赖 org.apache.lucene lucene-core 5.3.1 org.apache.lucene lucene-queryparser 5.3.1 org.apache.lucene lucene-analyzers-common ...
复制链接

扫一扫