PS:当在搜索框检索“是”,根据lucenne存取记录,会有一张词汇表(词汇表会根据不同分词原则进行分词,可能是一个或者多个,右边里保存这标号)
创建索引库,document是原始记录表
/** * 创建索引库 * 将Aritcle对象放入索引库中的原始记录表中,从而形成词汇表 */ @Test public void createIndexDB() throws Exception{ //创建Article对象 Article article = new Article(1,"培训","传智是一家IT培训机构"); //创建Document对象 Document document = new Document(); //将Article对象中的三个属性值分别绑定到Document对象中 /* *参数一:document对象中的属性名叫xid,article对象中的属性名叫id,项目中提倡相同 *参数二:document对象中的属性xid的值,与article对象中相同 *参数三:是否将xid属性值存入由原始记录表中转存入词汇表 * Store.YES表示该属性值会存入词汇表 * Store.NO表示该属性值不会存入词汇表 * 项目中提倡非id值都存入词汇表 *参数四:是否将xid属性值进行分词算法 * Index.ANALYZED表示该属性值会进行词汇拆分 * Index.NOT_ANALYZED表示该属性值不会进行词汇拆分 * 项目中提倡非id值都进行词汇拆分 * 目前将分词理解为分汇拆分,目前认为一个汉字一个分词拆分 */ document.add(new Field("xid",article.getId().toString(),Store.YES,Index.ANALYZED)); document.add(new Field("xtitle",article.getTitle(),Store.YES,Index.ANALYZED)); document.add(new Field("xcontent",article.getContent(),Store.YES,Index.ANALYZED)); Directory directory = FSDirectory.open(new File("E:/IndexDBDBDB")); Version version = Version.LUCENE_30; Analyzer analyzer = new StandardAnalyzer(version); //这种分词模型是按照单个字进行分词的 MaxFieldLength maxFieldLength = MaxFieldLength.LIMITED; //如果有多余2万个字内容,只会以前1万个为准 //创建IndexWriter字符流对象 /* * 参数一:lucene索引库最终应对于硬盘中的目录,例如:E:/IndexDBDBDB * 参数二:采用什么策略将文本拆分,一个策略就是一个具体的实现类 * 参数三:最多将文本拆分出多少词汇,LIMITED表示1万个,即只取前1万个词汇,如果不足1W个词汇个,以实际为准 */ IndexWriter indexWriter = new IndexWriter(directory,analyzer,maxFieldLength); //将document对象写入lucene索引库 indexWriter.addDocument(document); //关闭IndexWriter字符流对象 indexWriter.close(); }
PS:创建好的文件
/** * 根据关键字从索引库中搜索符合条件的内容 */ @Test public void findIndexDB() throws Exception{ //准备工作 String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); Directory directory = FSDirectory.open(new File("D:/All_Files/IndexDBDBDB")); Version version = Version.LUCENE_30; Analyzer analyzer = new StandardAnalyzer(version); MaxFieldLength maxFieldLength = MaxFieldLength.LIMITED; //创建IndexSearcher字符流对象 IndexSearcher indexSearcher = new IndexSearcher(directory); //创建查询解析器对象 /* * 参数一:使用分词器的版本,提倡使用该jar包中的最高版本 * 参数二:争对document对象中的哪个属性进行搜索 */ QueryParser queryParser = new QueryParser(version,"xcontent",analyzer); //创建对象对象封装查询关键字 Query query = queryParser.parse(keywords); //根据关键字,去索引库中的词汇表搜索 /* * 参数一:表示封装关键字查询对象,其它QueryParser表示查询解析器 * 参数二:MAX_RECORD表示如果根据关键字搜索出来的内容较多,只取前MAX_RECORD个内容 * 不足MAX_RECORD个数的话,以实际为准 */ int MAX_RECORD = 100; TopDocs topDocs = indexSearcher.search(query,MAX_RECORD); //TopDocs就是分好词右边的记录 //迭代词汇表中符合条件的编号 for(int i=0;i<topDocs.scoreDocs.length;i++){ //取出封装编号和分数的ScoreDoc对象 ScoreDoc scoreDoc = topDocs.scoreDocs[i]; //取出每一个编号,例如:0,1,2 int no = scoreDoc.doc; //根据编号去索引库中的原始记录表中查询对应的document对象 Document document = indexSearcher.doc(no); //获取document对象中的三个属性值 String xid = document.get("xid"); String xtitle = document.get("xtitle"); String xcontent = document.get("xcontent"); //封装到artilce对象中 Article article = new Article(Integer.parseInt(xid),xtitle,xcontent); //将article对象加入到list集合中 articleList.add(article); } //迭代结果集 for(Article a:articleList){ System.out.println(a); } }
PS : 输出,目前标准格式只是能对 中文进行识别
编号:1
标题:培训
内容:传智是一家IT培训机构
PS : LucenueUtil的重构
package cn.itcast.javaee.lucene.util; import java.io.File; import java.lang.reflect.Method; import org.apache.commons.beanutils.BeanUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import cn.itcast.javaee.lucene.entity.Article; /** * 工具类 * @author AdminTC */ public class LuceneUtil { private static Directory directory; private static Version version; private static Analyzer analyzer; private static MaxFieldLength maxFieldLength; static{ try { directory = FSDirectory.open(new File("D:/All_Files/IndexDBDBDB")); version = Version.LUCENE_30; analyzer = new StandardAnalyzer(version); maxFieldLength = MaxFieldLength.LIMITED; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } } public static Directory getDirectory() { return directory; } public static Version getVersion() { return version; } public static Analyzer getAnalyzer() { return analyzer; } public static MaxFieldLength getMaxFieldLength() { return maxFieldLength; } //不让外界new该帮助类 private LuceneUtil(){} //将JavaBean转成Document对象 public static Document javabean2document(Object obj) throws Exception{ //创建Docuemnt对象 Document document = new Document(); //获取obj引用的对象字节码 Class clazz = obj.getClass(); //通过对象字节码获取私有的属性 java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields(); //迭代 for(java.lang.reflect.Field reflectField : reflectFields){ //强力反射 reflectField.setAccessible(true); //获取属性名,id/title/content String name = reflectField.getName(); //人工拼接方法名 String methodName = "get" + name.substring(0,1).toUpperCase()+name.substring(1); //获取方法,例如:getId()/getTitle()/getContent() Method method = clazz.getMethod(methodName,null); //执行方法 String value = method.invoke(obj,null).toString(); //加入到Document对象中去,这时javabean的属性与document对象的属性相同 document.add(new Field(name,value,Store.YES,Index.ANALYZED)); } //返回document对象 return document; } //将Document对象转成JavaBean对象 public static Object document2javabean(Document document,Class clazz) throws Exception{ Object obj = clazz.newInstance(); java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields(); for(java.lang.reflect.Field reflectField : reflectFields){ reflectField.setAccessible(true); String name = reflectField.getName();//id/title/content String value = document.get(name);//1/培训/传智是一家培训机构 BeanUtils.setProperty(obj,name,value);//封装javabean对应的属性中去,通过setXxx()方法 } return obj; } //测试 public static void main(String[] args) throws Exception{ Article aritcle = new Article(1,"培训","传智是一家培训机构"); Document document = LuceneUtil.javabean2document(aritcle); System.out.println("---------------------------------------"); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); System.out.println(article); } }
PS: 12_Lucene索引库查询的过程(写代码时参考)
PS: 对二次 查询Lucenue的改造
package cn.itcast.javaee.lucene.secondapp; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 重构FirstApp * @author AdminTC */ public class SecondApp { /** * 创建索引库 */ @Test public void createIndexDB() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构"); //Article article = new Article(2,"培训","北大是一家it培训机构"); //Article article = new Article(3,"培训","中大是一家it培训机构"); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } /** * 根据关键字从索引库中查询符合条件的数据 */ @Test public void findIndexDB() throws Exception{ String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article)LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println( a ); } } }
PS: 创建一次库,就会生成一次 .cfs文件,本次总是生成3次
输出:...................................
编号:1 标题:培训 内容:传智是一家it培训机构 编号:1 标题:培训 内容:传智是一家it培训机构 编号:1 标题:培训 内容:传智是一家it培训机构
Lucence dao层的CURD
package cn.itcast.javaee.lucene.curd; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 增删改查索引库 * @author AdminTC */ public class ArticleDao { @Test public void add() throws Exception{ Article article = new Article(1,"培训","传智是一家java培训机构"); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document);//核心 indexWriter.close(); } @Test public void addAll() throws Exception{ IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); Article article1 = new Article(1,"培训","传智是一家java培训机构"); Document document1 = LuceneUtil.javabean2document(article1); indexWriter.addDocument(document1); Article article2 = new Article(2,"培训","传智是一家net培训机构"); Document document2 = LuceneUtil.javabean2document(article2); indexWriter.addDocument(document2); Article article3 = new Article(3,"培训","传智是一家php培训机构"); Document document3 = LuceneUtil.javabean2document(article3); indexWriter.addDocument(document3); Article article4 = new Article(4,"培训","传智是一家ios培训机构"); Document document4 = LuceneUtil.javabean2document(article4); indexWriter.addDocument(document4); Article article5 = new Article(5,"培训","传智是一家ui培训机构"); Document document5 = LuceneUtil.javabean2document(article5); indexWriter.addDocument(document5); Article article6 = new Article(6,"培训","传智是一家c++培训机构"); Document document6 = LuceneUtil.javabean2document(article6); indexWriter.addDocument(document6); Article article7 = new Article(7,"培训","传智是一家seo培训机构"); Document document7 = LuceneUtil.javabean2document(article7); indexWriter.addDocument(document7); indexWriter.close(); }
//在update的时候,会删除掉更新的id,然后再创建我觉得 @Test public void update() throws Exception{ Article newArticle = new Article(1,"培训","传智是一家JAVA培训机构"); Document document = LuceneUtil.javabean2document(newArticle); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); //更新id=7的document对象 /* * 参数一:term表示需要更新的document对象,id表示document对象中的id属性,7表示该id属性的值 * 参数二:新的document对象 */ indexWriter.updateDocument(new Term("id","1"),document);//核心 indexWriter.close(); } @Test public void delete() throws Exception{ IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.deleteDocuments(new Term("id","2"));//核心 indexWriter.close(); } @Test public void deleteAll() throws Exception{ IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.deleteAll();//核心 indexWriter.close(); } @Test public void findAllByKeywords() throws Exception{ String keywords = "培"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100);//核心 for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article)LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println( a ); } } }
Lucene分页持久层,获取总页数和每页的内容
package cn.itcast.javaee.lucene.fy.dao; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import cn.itcast.javaee.lucene.fy.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 持久层 * @author AdminTC */ public class ArticleDao { /** * 根据关键字,获取总记录数 * @return 总记录数 */ public int getAllRecord(String keywords) throws Exception{ QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,2); //返回符合条件的真实总记录数,不受2的影响 return topDocs.totalHits; //返回符合条件的总记录数,受2的影响 //return topDocs.scoreDocs.length; } /** * 根据关键字,批量查询记录 * @param start 从第几条记录的索引号开始查询,索引号从0开始 * @param size 最多查询几条记录,不满足最多数目时,以实际为准 * @return 集合 */ public List<Article> findAll(String keywords,int start,int size) throws Exception{ List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); //小技巧 int middle = Math.min(start+size,topDocs.totalHits); for(int i=start;i<middle;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } return articleList; } public static void main(String[] args) throws Exception{ ArticleDao dao = new ArticleDao(); System.out.println(dao.getAllRecord("培训")); System.out.println("------------------------------"); System.out.println("第一页"); List<Article> list = dao.findAll("培训",0,2); for(Article a : list){ System.out.println(a); } System.out.println("第二页"); list = dao.findAll("培训",2,2); for(Article a : list){ System.out.println(a); } System.out.println("第三页"); list = dao.findAll("培训",4,2); for(Article a : list){ System.out.println(a); } System.out.println("第四页"); list = dao.findAll("培训",6,2); for(Article a : list){ System.out.println(a); } } }
PageBean
package cn.itcast.javaee.lucene.fy.entity; import java.util.ArrayList; import java.util.List; /** * 用于Article的分页类 * @author AdminTC */ public class Page { private Integer currPageNO;//当前页号OK private Integer perPageSize = 2;//每页显示记录数,默认为2条OK private Integer allRecordNO;//总记录数OK private Integer allPageNO;//总页数OK private List<Article> articleList = new ArrayList<Article>();//内容OK public Page(){} public Integer getCurrPageNO() { return currPageNO; } public void setCurrPageNO(Integer currPageNO) { this.currPageNO = currPageNO; } public Integer getPerPageSize() { return perPageSize; } public void setPerPageSize(Integer perPageSize) { this.perPageSize = perPageSize; } public Integer getAllRecordNO() { return allRecordNO; } public void setAllRecordNO(Integer allRecordNO) { this.allRecordNO = allRecordNO; } public Integer getAllPageNO() { return allPageNO; } public void setAllPageNO(Integer allPageNO) { this.allPageNO = allPageNO; } public List<Article> getArticleList() { return articleList; } public void setArticleList(List<Article> articleList) { this.articleList = articleList; } }
Service
package cn.itcast.javaee.lucene.fy.service; import java.util.List; import cn.itcast.javaee.lucene.fy.dao.ArticleDao; import cn.itcast.javaee.lucene.fy.entity.Article; import cn.itcast.javaee.lucene.fy.entity.Page; /** * 业务层 * @author AdminTC */ public class ArticleService { //持久层 private ArticleDao articleDao = new ArticleDao(); /** * 根据关键字和页号,查询内容 */ public Page show(String keywords,int currPageNO) throws Exception{ Page page = new Page(); //封装当前页号 page.setCurrPageNO(currPageNO); //封装总记录数 int allRecordNO = articleDao.getAllRecord(keywords); page.setAllRecordNO(allRecordNO); //封装总页数 int allPageNO = 0; if(page.getAllRecordNO() % page.getPerPageSize() == 0){ allPageNO = page.getAllRecordNO() / page.getPerPageSize(); }else{ allPageNO = page.getAllRecordNO() / page.getPerPageSize() + 1; } page.setAllPageNO(allPageNO); //封装内容 int size = page.getPerPageSize(); int start = (page.getCurrPageNO()-1) * size; List<Article> articleList = articleDao.findAll(keywords,start,size); page.setArticleList(articleList); return page; } //测试 public static void main(String[] args) throws Exception{ ArticleService test = new ArticleService(); Page page = test.show("培训",1);//第一页 System.out.println(page.getCurrPageNO()); System.out.println(page.getPerPageSize()); System.out.println(page.getAllRecordNO()); System.out.println(page.getAllPageNO());//分成4页 for(Article a : page.getArticleList()){ System.out.println(a); } } }
Controller
package cn.itcast.javaee.lucene.fy.action; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import cn.itcast.javaee.lucene.fy.entity.Page; import cn.itcast.javaee.lucene.fy.service.ArticleService; public class ArticleServlet extends HttpServlet { public void doGet(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException { this.doPost(request,response); } public void doPost(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException { try { request.setCharacterEncoding("UTF-8"); //获取关键字 String keywords = request.getParameter("keywords");//培训 if(keywords == null || keywords.trim().length()==0){ keywords = "培训";//默认值 } //获取当前页号 String temp = request.getParameter("currPageNO"); if(temp == null || temp.trim().length()==0){ temp = "1";//默认值 } //调用业务层 ArticleService articleService = new ArticleService(); Page page = articleService.show(keywords,Integer.parseInt(temp)); //将Page对象绑定到request域对象中 request.setAttribute("PAGE",page); //将keywords变量绑定到request域对象中 request.setAttribute("KEYWORDS",keywords); //转发到list.jsp页面 request.getRequestDispatcher("/list.jsp").forward(request,response); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } } }
list.jsp
<%@ page language="java" pageEncoding="UTF-8"%> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <title>同步分页</title> </head> <body> <!-- 输入区 --> <form action="${pageContext.request.contextPath}/ArticleServlet" method="POST"> <input type="hidden" name="currPageNO" value="1"/> <table border="2" align="center"> <tr> <th>输入关键字</th> <td><input type="text" name="keywords" value="${requestScope.KEYWORDS}" maxlength="10"/></td> <td><input id="search" type="button" value="站内搜索"/></td> </tr> </table> </form> <script type="text/javascript"> //去空格 function trim(str){//" 培训 " //先去左边空格 str = str.replace(/^\s*/,"");//"培训 "" //后去右边空格 str = str.replace(/\s*$/,"");//"培训" //返回str return str; } //定位"站内搜索"按钮,同时提供单击事件 document.getElementById("search").onclick = function(){ //定位表单 var formElement = document.forms[0]; //获取关键字 var keywords = formElement.keywords.value; //去空格 keywords = trim(keywords); //判断长度 if(keywords.length == 0){ //提示 alert("你没有填关键字!!!"); }else{ //提交表单 formElement.submit(); } } </script> <!-- 显示区 --> <table border="2" align="center" width="70%"> <tr> <th>编号</th> <th>标题</th> <th>内容</th> </tr> <c:forEach var="article" items="${requestScope.PAGE.articleList}"> <tr> <td>${article.id}</td> <td>${article.title}</td> <td>${article.content}</td> </tr> </c:forEach> <tr> <th colspan="3" align="center"> <a onclick="fy(1)" style="cursor:hand;color:blue;text-decoration:underline">首页</a> <c:choose> <c:when test="${requestScope.PAGE.currPageNO+1<=requestScope.PAGE.allPageNO}"> <a onclick="fy(${requestScope.PAGE.currPageNO+1})" style="cursor:hand;color:blue;text-decoration:underline">下一页</a> </c:when> <c:otherwise> 下一页 </c:otherwise> </c:choose> <a onclick="fy()" style="cursor:hand;color:blue;text-decoration:underline">上一页</a> <a onclick="fy(${requestScope.PAGE.allPageNO})" style="cursor:hand;color:blue;text-decoration:underline">未页</a> </th> </tr> </table> <script type="text/javascript"> function fy(currPageNO){ //定位表单 var formElement = document.forms[0]; //修改当前页号 formElement.currPageNO.value = currPageNO; //提交表单 formElement.submit(); } </script> </body> </html>
PS :站内检索
package cn.itcast.javaee.lucene.optimize; import java.io.File; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 索引库进行优化 * @author AdminTC */ public class ArticleDao { /** * 增加document对象索引库中 * 问题的引入 */ @Test public void add() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } /** * 合并cfs文件,合并后的cfs文件是二进制压缩字符,能解决是的文件大小和数量的问题 * @throws Exception */ @Test public void type1() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); //合并cfs文本 indexWriter.optimize(); //会合并成一个文件 indexWriter.close(); } /** * 设定合并因子,自动合并cfs文件 * @throws Exception */ @Test public void type2() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); //设置合并因子,即满足3个cfs文本一合并 indexWriter.setMergeFactor(3); //每3个文件一合并 indexWriter.close(); }
/** * 默认情况下,每10个cfs文本一合并 * @throws Exception */ @Test public void type3() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); //设置合并因子,即满足10个cfs文本一合并 //indexWriter.setMergeFactor(10); indexWriter.close(); }
PS : 上述能解决快的问题,是这样的:首先硬盘索引库 同步到内存索引库, 当进行操作时是在内存索引库中, 操作完以后先清空硬盘索引库,然后再存入硬盘索引库
/** * 使用RAMDirectory,类似于内存索引库,能解决是的读取索引库文件的速度问题 * @throws Exception */ @Test public void type4() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); Document document = LuceneUtil.javabean2document(article); //硬盘索引库 Directory fsDirectory = FSDirectory.open(new File("D:/All_Files/IndexDBDBDB")); //内存索引库,因为硬盘索引库的内容要同步到内存索引库中 Directory ramDirectory = new RAMDirectory(fsDirectory); //指向硬盘索引库的字符流,true表示如果内存索引库中和硬盘索引库中的相同的document对象时,先删除硬盘索引库中的document对象, //再将内存索引库的document对象写入硬盘索引库中 //反之是false,默认为false,这个boolean值写在硬盘字符流的构造器 IndexWriter fsIndexWriter = new IndexWriter(fsDirectory,LuceneUtil.getAnalyzer(),true,LuceneUtil.getMaxFieldLength()); //指向内存索引库的字符流 IndexWriter ramIndexWriter = new IndexWriter(ramDirectory,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); //将document对象写入内存索引库 ramIndexWriter.addDocument(document); ramIndexWriter.close(); //将内存索引库的所有document对象同步到硬盘索引库中 fsIndexWriter.addIndexesNoOptimize(ramDirectory); fsIndexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "家"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
package cn.itcast.javaee.lucene.analyzer; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; import cn.itcast.javaee.lucene.util.*; /** * 测试Lucene内置和第三方分词器的分词效果 * @author AdminTC */ public class TestAnalyzer { private static void testAnalyzer(Analyzer analyzer, String text) throws Exception { System.out.println("当前使用的分词器:" + analyzer.getClass()); TokenStream tokenStream = analyzer.tokenStream("content",new StringReader(text)); tokenStream.addAttribute(TermAttribute.class); while (tokenStream.incrementToken()) { TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class); System.out.println(termAttribute.term()); } } public static void main(String[] args) throws Exception{ //Lucene内存的分词器 //testAnalyzer(new StandardAnalyzer(LuceneUtil.getVersion()),"传智播客说我们的首都是北京呀it"); //testAnalyzer(new FrenchAnalyzer(LuceneUtil.getVersion()),"传智播客说我们的首都是北京呀it"); //testAnalyzer(new RussianAnalyzer(LuceneUtil.getVersion()),"传智播客说我们的首都是北京呀it"); //testAnalyzer(new ChineseAnalyzer(),"传智播客说我们的首都是北京呀it"); //testAnalyzer(new CJKAnalyzer(LuceneUtil.getVersion()),"传智播客说我们的首都是北京呀it");//两两分词 //testAnalyzer(new CJKAnalyzer(LuceneUtil.getVersion()),"传智是一家IT培训机构"); //testAnalyzer(new FrenchAnalyzer(LuceneUtil.getVersion()),"传智是一家how are you培训机构"); //testAnalyzer(new IKAnalyzer(),"传智播客说我们的首都是北京呀"); testAnalyzer(new IKAnalyzer(),"上海自来水来自海上"); } }
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <properties> <comment>IK Analyzer 扩展配置</comment> <!-- 用户可以在这里配置自己的扩展字典 ,也就是配置 专用的自己的分词--> <entry key="ext_dict">/mydict.dic</entry> <!--用户可以在这里配置自己的扩展停止词字典,不想被搜索出来的单词 --> <entry key="ext_stopwords">/surname.dic</entry> </properties>
三)搜索结果高亮 3.1什么是搜索结果高亮 在搜索结果中,将与关健字相同的字符用红色显示 String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,1000000); Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); String highlighterContent = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"content",document.get("content")); document.getField("content").setValue(highlighterContent); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article article : articleList){ System.out.println(article); } } 四)搜索结果摘要 4.1什么是搜索结果搞要 如果搜索结果内容太多,我们只想显示前几个字符, 必须与高亮一起使用 String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,1000000); Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); Fragmenter fragmenter = new SimpleFragmenter(4); highlighter.setTextFragmenter(fragmenter); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); String highlighterContent = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"content",document.get("content")); document.getField("content").setValue(highlighterContent); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article article : articleList){ System.out.println(article); } }
package cn.itcast.javaee.lucene.highlighter; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 搜索结果中关键字高亮 * @author AdminTC */ public class ArticleDao { /** * 增加document对象索引库中 */ @Test public void add() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); //以下代码对内容中含有关键字的字符串高亮显示 //格式对象 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); //关键字对象 Scorer scorer = new QueryScorer(query); //高亮对象 Highlighter highlighter = new Highlighter(formatter,scorer); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; //关键字没有高亮 Document document = indexSearcher.doc(no); //关键字高亮 String titleHighlighter = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"title",document.get("title")); String contentHighlighter = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"content",document.get("content")); //将高亮后的结果再次封装到document对象中 document.getField("title").setValue(titleHighlighter); document.getField("content").setValue(contentHighlighter); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
package cn.itcast.javaee.lucene.search; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 演示Lucene中,根据多个字段搜索 * @author AdminTC */ public class ArticleDao { /** * 增加document对象索引库中 */ @Test public void add() throws Exception{ Article article = new Article(1,"培训","传智是一家it培训机构",10); //Article article = new Article(2,"培训","北大是一家it培训机构",20); //Article article = new Article(3,"培训","中大是一家it培训机构",20); //Article article = new Article(4,"培训","小大是一家it培训机构",30); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "机构"; List<Article> articleList = new ArrayList<Article>(); //单字段搜索 //QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"title",LuceneUtil.getAnalyzer()); //多字段搜索,好处:搜索的范围大,最大限度匹配搜索结果 可以检索多个字段 QueryParser queryParser = new MultiFieldQueryParser( LuceneUtil.getVersion(), new String[]{"content","title"}, LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
1.根据得分排序
package cn.itcast.javaee.lucene.sort; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 在默认情况下,Lucene是按照相关度得份排序的 * @author AdminTC */ public class ArticleDao1 { /** * 增加document对象索引库中 */ @Test public void add() throws Exception{ //Article article = new Article(1,"培训","传智是一家it培训机构",10); //Article article = new Article(2,"培训","北大是一家it培训机构",20); Article article = new Article(3,"培训","中大是一家华南地区it培训机构",30); //Article article = new Article(4,"培训","哈哈培训机构是好的培训",9); //Article article = new Article(5,"培训","培训培训培训培训培训培训培训培训培训培训培训培训",15); //Article article = new Article(6,"培训","培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训培训",35); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); //人工设置该document的得分 //document.setBoost(100F); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; //获取document对象的评分 float score = scoreDoc.score; System.out.println("score=" + score); Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
2.根据单个或多个字段排序
package cn.itcast.javaee.lucene.sort; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 演示Lucene中,根据单个或多个字段排序 * @author AdminTC */ public class ArticleDao2 { /** * 增加document对象索引库中 */ @Test public void add() throws Exception{ //Article article = new Article(1,"培训","传智是一家it培训机构",10); //Article article = new Article(2,"培训","北大是一家it培训机构",20); //Article article = new Article(3,"培训","中大是一家it培训机构",20); Article article = new Article(4,"培训","小大是一家it培训机构",30); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "培训"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); //按得分度高低排序 //TopDocs topDocs = indexSearcher.search(query,100); //创建排序对象 //参数一:id表示依据document对象中的哪个字段排序,例如:id //参数二:SortField.INT表示document对象中该字段的类型,以常量方式书写 //参数三:true表示降序,类似于order by id desc //参数三:false表示升序,类似于order by id asc //Sort sort = new Sort(new SortField("id",SortField.INT,false)); //按count字段的降序排列,如果count字段相同的话,再按id的升序排序 Sort sort = new Sort( new SortField("count",SortField.INT,true), new SortField("id",SortField.INT,false)); //sort表示排序的条件 TopDocs topDocs = indexSearcher.search(query,null,100,sort); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
PS:lucene异步分页结果