Lucene索引库的维护
Lucene工具类封装
package com.xushuai.lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
/**
* Lucene工具类
* Author: xushuai
* Date: 2018/5/7
* Time: 12:36
* Description:
*/
public class LuceneUtil {
/**
* 获取索引库写入流对象
* @auther: xushuai
* @date: 2018/5/7 12:37
* @return: 写入流对象
* @throws: IOException
*/
public static IndexWriter getIndexWriter(String pathname, Analyzer analyzer) throws IOException {
//存放索引库的路径
Directory directory = FSDirectory.open(new File(pathname));
//创建分析器(使用其子类,标准分析器类)
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
//使用索引库路径和分析器构造索引库写入流
return new IndexWriter(directory,indexWriterConfig);
}
/**
* 获取索引库读取流
* @auther: xushuai
* @date: 2018/5/7 12:45
* @return: 读取流对象
* @throws: IOException
*/
public static IndexReader getIndexReader(String pathname) throws IOException {
//指定索引库位置
Directory directory = FSDirectory.open(new File(pathname));
//创建索引库读取流
return DirectoryReader.open(directory);
}
/**
* 打印结果集到控制台
* @auther: xushuai
* @date: 2018/5/7 13:23
* @throws: IOException
*/
public static void printResult(IndexSearcher indexSearcher, Query query, int count) throws IOException {
//执行查询,第一个参数为:查询条件 第二个参数为:结果返回最大个数
TopDocs topDocs = indexSearcher.search(query, count);
//打印结果集长度
System.out.println("查询结果总条数:" + topDocs.totalHits);
//遍历结果集
for (ScoreDoc doc:topDocs.scoreDocs) {
//获取其查询到的文档对象,ScoreDoc对象的doc属性可以获取document的id值
Document document = indexSearcher.doc(doc.doc);
//打印文件名
System.out.println("文件名: " + document.get("filename"));
//打印文件大小
System.out.println("文件大小:" + document.get("filesize"));
//打印文件路径
System.out.println("文件路径:" + document.get("filepath"));
//分割线
System.out.println("------------------------------------------------------------------------------");
}
}
}
一、索引库的修改和删除
package com.xushuai.lucene;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.IOException;
/**
* 索引库维护
* Author: xushuai
* Date: 2018/5/7
* Time: 12:47
* Description:对索引库的维护主要分为:增删改查(CRUD),其中新增即为创建索引库,查询内容较多,单独为一个分支。
*/
public class LuceneManager {
private IndexWriter indexWriter = null;
/**
* 加载写入流
* @auther: xushuai
* @date: 2018/5/7 17:21
* @throws: IOException
*/
@Before
public void setUp() throws IOException {//注意:IKAnalyzer为第三方分析器,需要单独导包
indexWriter = LuceneUtil.getIndexWriter("D:\\lucene-solr\\lucene\\index", new IKAnalyzer());
}
/**
* 释放资源
* @auther: xushuai
* @date: 2018/5/7 17:21
* @throws: IOException
*/
@After
public void tearDown() throws IOException {
indexWriter.close();
}
/**
* Lucene索引修改过程:先删除,再添加
* @auther: xushuai
* @date: 2018/5/7 12:56
* @throws: IOException
*/
@Test
public void luceneUpdateRepository() throws IOException {
//创建Document对象
Document document = new Document();
//添加域
document.add(new TextField("fname","修改后的文件名", Field.Store.YES));
document.add(new TextField("fcontent","修改后的文件内容",Field.Store.YES));
//修改,其中第一个参数:为一个Term,会根据该Term去匹配要修改的文档对象
indexWriter.updateDocument(new Term("filename","java"),document);
}
/**
* 删除索引
* @auther: xushuai
* @date: 2018/5/7 13:00
* @throws: IOException
*/
@Test
public void luceneDeleteRepository() throws IOException{
//第一种删除:删除索引库全部内容
indexWriter.deleteAll();
//第二种删除:删除指定索引。过程:先查询,后删除
//新建一个查询条件,例如:删除 "filename" 域的值中含有 "java" 的
Query query = new TermQuery(new Term("filename","java"));
//根据条件进行删除(参数为:Query可变数组,可以为多个条件,即组合条件删除)
indexWriter.deleteDocuments(query);
}
}
二、查询索引(Query子类)
package com.xushuai.lucene;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
/**
* Lucene索引查询
* Author: xushuai
* Date: 2018/5/7
* Time: 13:19
* Description:
*/
public class LuceneQuery {
private IndexSearcher indexSearcher = null;
/**
* 准备工作
* @auther: xushuai
* @date: 2018/5/7 17:27
* @throws: IOException
*/
@Before
public void setUp() throws IOException {
IndexReader indexReader = LuceneUtil.getIndexReader("D:\\lucene-solr\\lucene\\index");
indexSearcher = new IndexSearcher(indexReader);
}
/**
* 释放资源
* @auther: xushuai
* @date: 2018/5/7 17:27
* @throws: IOException
*/
@After
public void tearDown() throws IOException {
indexSearcher.getIndexReader().close();
}
/**
* 查询索引目录中的所有文档
* @auther: xushuai
* @date: 2018/5/7 13:22
* @return:
* @throws:
*/
@Test
public void luceneMatchAllDocsQuery() throws IOException {
//创建查询条件对象,MatchAllDocsQuery:查询全部文档对象
Query query = new MatchAllDocsQuery();
//打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
LuceneUtil.printResult(indexSearcher,query,10);
}
/**
* 精准查询
* @auther: xushuai
* @date: 2018/5/7 13:32
* @return:
* @throws:
*/
@Test
public void luceneTermQuery() throws IOException {
//创建查询条件对象,TermQuery:精准查询,按Term查询。例如: 查询 filename 域的值为 java的文档对象
Query query = new TermQuery(new Term("filename","java"));
//打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
LuceneUtil.printResult(indexSearcher,query,10);
}
/**
*
* @auther: xushuai
* @date: 2018/5/7 13:34
* @return:
* @throws:
*/
@Test
public void luceneNumericRangeQuery() throws IOException {
//创建查询条件对象
/*
* newLongRange参数:
* 1、域名称
* 2、最小值
* 3、最大值
* 4、是否包含最小值:boolean
* 5、是否包含最大值:boolean
*/
Query query = NumericRangeQuery.newLongRange("filesize",50L,200L,true,true);
//打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
LuceneUtil.printResult(indexSearcher,query,10);
}
/**
* 条件组合查询
* @auther: xushuai
* @date: 2018/5/7 13:38
* @throws: IOException
*/
@Test
public void luceneBooleanQuery() throws IOException{
//创建查询对象
BooleanQuery booleanQuery = new BooleanQuery();
//创建查询条件
Query query1 = new TermQuery(new Term("filename","java"));
Query query2 = new TermQuery(new Term("filename","apache"));
//设置其两个条件的关系
/*
* BooleanClause.Occur.MUST:必须,即该条件必须成立
* BooleanClause.Occur.MUST_NOT:必须不,即该条件必须不成立
* BooleanClause.Occur.SHOULD:应该,即该条件可以成立也可以不成立,与OR类似\
*
* 下面这个组合条件翻译为:搜索文件名称中含有java或apache的文档对象
*/
booleanQuery.add(query1, BooleanClause.Occur.SHOULD);
booleanQuery.add(query2, BooleanClause.Occur.SHOULD);
//打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
LuceneUtil.printResult(indexSearcher,booleanQuery,10);
}
}