lucene的基础知识这里就不做过多的赘述
1、重置索引(创建索引)
1. 重置索引的接口
luceneType这个是根据业务库创建不同分类的索引
@ApiOperation(value = "重置索引", notes = "")
@RequestMapping(value = "/index/reset", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
public ResultDTO<Void> resetLucense(@RequestParam(required = false) final String luceneType) {
try {
LuceneUtil.createStructureIndexByLuceneType(luceneType);
return ResultDTO.success();
} catch (Exception e) {
e.printStackTrace();
}
return ResultDTO.failure();
}
2. LuceneUtil工具类中的方法
public static void createStructureIndexByLuceneType(String luceneType) {
try {
//删除原先的索引
LuceneUtil.deleteIndex(new Term("luceneType", luceneType));
IndexerDB indexDB = new IndexerDB();
// 根据分类重建索引
indexDB.executeByLuceneType(luceneType);
log.debug("重置结构化索引成功");
} catch (Exception e) {
log.error("重置结构化索引失败.....");
throw new RuntimeException(e);
}
}
2.1.根据分类删除原先索引
public static void deleteIndex(Term... terms) {
try {
//获取writer 具体的细节后面我会在文章后面发出来
IndexWriter indexWriter = LuceneUtil.getIndexWriter();
// 根据分类删除索引 目前就只能根据分类类型 删除索引
indexWriter.deleteDocuments(terms);
// 关闭资源
LuceneUtil.indexChanged();
log.debug("删除索引成功");
} catch (IOException e) {
log.error("删除索引失败.....");
throw new RuntimeException(e);
}
}
2.2根据分类重建索引
public String executeByLuceneType(String luceneType) throws Exception {
long start = new Date().getTime();
int numIndexed = getTableNameFromDBAndIndexingByLuceneType(0, luceneType);//重构
long end = new Date().getTime();
System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
return "success";
}
getTableNameFromDBAndIndexingByLuceneType 这个功能是根据 第一个参数(flg )和第二个参数(luceneType)
创建索引 luceneType 获取不同的ResultSet
ResultSet tempRs = null;
switch (flg) {
case 0:
tempRs = getDataFromTable(sql,String.valueOf(i));
break;
case 1:
tempRs = getIncreaseDataFromTable(sql);
break;
}
try {
// 进行索引的主要代码
indexData(writer, tempRs, tableName);
} catch (Exception e) {
e.printStackTrace();
log.error(e.getMessage());
}finally {
JDBCUtil.close();
}
创建索引的主要代码
try {
while (rs.next()) {
doc = new Document();
rsmd = rs.getMetaData();
int colsNum = rsmd.getColumnCount();
//System.out.println("colsNum: "+colsNum);
for (int i = 1; i < colsNum + 1; i++) {
String columnName = rsmd.getColumnName(i);
doc.add(new TextField(columnName, (rs.getString(i) == null ? "" : rs.getString(i)), Field.Store.YES));
}
// 省略很多业务判断打代码
doc.add(new StoredField("dataSource", dataSource));
doc.add(new StringField("luceneType", luceneType, Field.Store.YES));
writer.addDocument(doc);
}
}catch ( Exception e){
log.error(e.getMessage());
}finally {
log.info("创建索引完成");
}
至此索引创建成功
LuceneUtil工具类
package com.leadingsoft.lucence;
import com.leadingsoft.lucence.structure.IndexerDB;
import com.leadingsoft.lucence.util.IOUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
/**
* lucence 的工具类
*
* @author FunFeng
*/
@Slf4j
public class LuceneUtil {
/**
* 日志对象
*/
private static IndexWriter indexWriter;
private static IndexReader indexReader;
private static IndexSearcher indexSearcher;
/**
* 取得writer
*
* @return
*/
public static IndexWriter getIndexWriter() {
if (null == indexWriter) {
try {
indexWriter = new IndexWriter(LuceneConfig.getDirectory(), LuceneConfig.getIndexWriterConfig());
} catch (IOException e) {
if (null != indexWriter) {
try {
indexWriter.close();
indexWriter = null;
} catch (IOException e1) {
throw new RuntimeException(e1);
}
}
throw new RuntimeException(e);
}
}
return indexWriter;
}
/**
* 取得reader
*
* @return
*/
public static IndexReader getIndexReader() {
if (null == indexReader) {
try {
indexReader = DirectoryReader.open(LuceneConfig.getDirectory());
} catch (IOException e) {
if (null != indexReader) {
try {
indexReader.close();
indexReader = null;
} catch (IOException e1) {
throw new RuntimeException(e1);
}
}
throw new RuntimeException(e);
}
}
return indexReader;
}
public static IndexSearcher getIndexSearcher() {
//不判断indexSearcher 新版本indexSearcher 没有close方法 这个地方应该判断reader是否变化 因为 lucence的锁是以Directory为基准的 这样查询过程中 别人修改了索引会等待 多线程情况下就不会有问题
if (null == indexReader) {
indexSearcher = new IndexSearcher(LuceneUtil.getIndexReader());
}
return indexSearcher;
}
/**
* 关闭reader writer searcher
*/
public static void indexChanged() {
if (null != indexWriter) {
try {
indexWriter.close();
indexWriter = null;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (null != indexReader) {
try {
indexReader.close();
indexReader = null;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
/**
* 添加索引
*
* @param docs 多个或者单个字典
*/
public static void addIndex(Document... docs) {
if (docs.length == 0) {
return;
}
try {
IndexWriter indexWriter = LuceneUtil.getIndexWriter();
indexWriter.addDocuments(Arrays.asList(docs));
LuceneUtil.indexChanged();
log.debug("添加索引成功");
} catch (IOException e) {
log.error("添加索引失败.....");
throw new RuntimeException(e);
}
}
/**
* 删除索引
*
* @param terms 单个条件
*/
public static void deleteIndex(Term... terms) {
try {
IndexWriter indexWriter = LuceneUtil.getIndexWriter();
indexWriter.deleteDocuments(terms);
LuceneUtil.indexChanged();
log.debug("删除索引成功");
} catch (IOException e) {
log.error("删除索引失败.....");
throw new RuntimeException(e);
}
}
/**
* 重置结构化索引
*/
public static void createStructureIndex() {
try {
/* LuceneUtil.deleteIndex(new Term("luceneType", "1"));
LuceneUtil.deleteIndex(new Term("luceneType", "2"));
LuceneUtil.deleteIndex(new Term("luceneType", "3"));
LuceneUtil.deleteIndex(new Term("luceneType", "4"));
LuceneUtil.deleteIndex(new Term("luceneType", "5"));*/
LuceneUtil.deleteAllIndex();
IndexerDB indexDB = new IndexerDB();
indexDB.execute();
log.debug("重置结构化索引成功");
} catch (Exception e) {
log.error("重置结构化索引失败.....");
throw new RuntimeException(e);
}
}
public static void createStructureIndexByLuceneType(String luceneType) {
try {
// 删除分类索引
LuceneUtil.deleteIndex(new Term("luceneType", luceneType));
IndexerDB indexDB = new IndexerDB();
indexDB.executeByLuceneType(luceneType);
log.debug("重置结构化索引成功");
} catch (Exception e) {
log.error("重置结构化索引失败.....");
throw new RuntimeException(e);
}
}
/**
* 增量更新结构化索引
*/
public static void increaseStructureIndex() {
try {
IndexerDB indexDB = new IndexerDB();
indexDB.executeIncrease();
log.debug("更新结构化索引成功");
} catch (Exception e) {
log.error("更新结构化索引失败.....");
throw new RuntimeException(e);
}
}
public static String readFileContent(String path) throws Exception {
IOUtil utils = new IOUtil();
File fileChild = new File(path);
String content = "";
if (fileChild.isFile()) {
content = utils.getFileContent(fileChild, "GBK");
}
return content;
}
public static void createFileIndex() throws Exception {
IOUtil utils = new IOUtil();
//文件位置。
// String path = env.getProperty("uploadFolder.basePath");
String path = "D:/sbdb/psms/upload-file/";
long startTime = new Date().getTime();
String id;
//获取所有的文件名。
List<String> fileList = utils.getFileName(path);
IndexWriter indexWriter = LuceneUtil.getIndexWriter();
for (int i = 0; i < fileList.size(); i++) {
File fileChild = new File(fileList.get(i));
String content = utils.getFileContent(new File(fileList.get(i)), "GBK");
id = "id" + i;
Document doc = new Document();
doc.add(new StoredField("id", i));
doc.add(new TextField("content", content, Field.Store.YES));
doc.add(new StoredField("uid", i));
doc.add(new TextField("name", fileChild.getName(), Field.Store.YES));
doc.add(new StringField("path", fileList.get(i), Field.Store.YES));
if (fileList.get(i).indexOf("scss") != -1) {
doc.add(new StoredField("source", 9));
doc.add(new StringField("type", "share", Field.Store.YES));
} else if (fileList.get(i).indexOf("declare") != -1) {
doc.add(new StoredField("source", 1));
doc.add(new StringField("type", "permit_code", Field.Store.YES));
} else if (fileList.get(i).indexOf("infoChange") != -1) {
doc.add(new StoredField("source", 2));
doc.add(new StringField("type", "business_license", Field.Store.YES));
} else {
doc.add(new StoredField("source", 0));
doc.add(new StringField("type", "0", Field.Store.YES));
}
doc.add(new StoredField("createDate", new Date().getTime()));
doc.add(new StringField("luceneType", "1", Field.Store.YES));
try {
indexWriter.addDocument(doc);
} catch (Exception e) {
e.printStackTrace();
}
}
LuceneUtil.indexChanged();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println(" 这花费了 " + (endTime - startTime) + " 毫秒来把文档增加到索引里面去! ");
}
/**
* 删除结构化索引
*/
public static void deleteAllStructureIndex() {
try {
LuceneUtil.deleteIndex(new Term("luceneType", "2"));
log.debug("删除结构化索引成功");
} catch (Exception e) {
log.error("删除结构化索引失败.....");
throw new RuntimeException(e);
}
}
/**
* 删除结构化索引
*/
public static void deleteAllFileIndex() {
try {
LuceneUtil.deleteIndex(new Term("luceneType", "1"));
log.debug("删除结构化索引成功");
} catch (Exception e) {
log.error("删除结构化索引失败.....");
throw new RuntimeException(e);
}
}
/**
* 删除所有索引
*/
public static void deleteAllIndex() {
try {
IndexWriter indexWriter = LuceneUtil.getIndexWriter();
indexWriter.deleteAll();
LuceneUtil.indexChanged();
log.debug("删除所有索引成功");
} catch (IOException e) {
log.error("删除所有索引失败.....");
throw new RuntimeException(e);
}
}
/**
* 更新索引 一般以id 组件term
*
* @param term 条件
* @param newDoc 新字典数据
*/
public static void updateIndex(Term term, Document newDoc) {
try {
IndexWriter indexWriter = LuceneUtil.getIndexWriter();
indexWriter.updateDocument(term, newDoc);
LuceneUtil.indexChanged();
log.debug("更新索引成功");
} catch (IOException e) {
log.error("更新索引失败.....");
throw new RuntimeException(e);
}
}
}
LuceneConfig工具类
package com.leadingsoft.lucence;
import java.io.IOException;
import java.nio.file.Paths;
import com.chenlb.mmseg4j.analysis.MMSegAnalyzer;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.env.PropertySourcesLoader;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.env.Environment;
import org.springframework.core.env.PropertySource;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import org.springframework.stereotype.Component;
/**
* lucence 配置项
*
* @author DF
*/
@Slf4j
@Component
public class LuceneConfig {
@Autowired
private Environment env;
//索引目录
private static Directory directory;
//分词器
private static Analyzer analyzer;
// 写索引的配置
private static IndexWriterConfig indexWriterConfig;
//TODO 替换配置文件
//索引目录
private static String indexPath = "D:/sbdb/psms/lucene/";
private ResourceLoader loader = new DefaultResourceLoader();
static {
try {
//文件目录
directory = FSDirectory.open(Paths.get(indexPath));
//分词器可以换
// analyzer = new StandardAnalyzer();
analyzer = new MMSegAnalyzer();
// 指定在JVM退出前要执行的代码
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
LuceneUtil.indexChanged();
try {
if (null != directory) {
directory.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
} catch (Exception e) {
log.error("lucence 配置错误.....");
throw new RuntimeException(e);
}
}
public static Directory getDirectory() {
return directory;
}
public static Analyzer getAnalyzer() {
return analyzer;
}
public static IndexWriterConfig getIndexWriterConfig() {
indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //如果不存在则创建,否则追加
return indexWriterConfig;
}
}
查询结果分页工具类
package com.leadingsoft.lucence;
import java.util.ArrayList;
import java.util.List;
/**
* lucence的分页结果
* @author DF
*
*/
public class LucenePage<T> {
private int pageNo = 1;//当前页
private int pageSize = 10;//没页条数
private List<T> list = new ArrayList<T>();//数据
private long total;// 总数
private int first;// 首页索引
private int last;// 尾页索引
private int prev;// 上一页索引
private int next;// 下一页索引
private boolean firstPage;//是否是第一页
private boolean lastPage;//是否是最后一页
public LucenePage(int pageNo, int pageSize) {
this.pageNo = pageNo;
this.pageSize = pageSize;
}
/**
* 起始索引
* @return
*/
public int getFirstResult(){
return (pageNo-1) * pageSize ;
}
public int getPageNo() {
return pageNo;
}
public void setPageNo(int pageNo) {
this.pageNo = pageNo;
}
public int getFirst() {
return first;
}
public void setFirst(int first) {
this.first = first;
}
public int getLast() {
return last;
}
public void setLast(int last) {
this.last = last;
}
public int getPrev() {
return prev;
}
public void setPrev(int prev) {
this.prev = prev;
}
public int getNext() {
return next;
}
public void setNext(int next) {
this.next = next;
}
public boolean isFirstPage() {
return firstPage;
}
public void setFirstPage(boolean firstPage) {
this.firstPage = firstPage;
}
public boolean isLastPage() {
return lastPage;
}
public void setLastPage(boolean lastPage) {
this.lastPage = lastPage;
}
public int getPageSize() {
return pageSize;
}
public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}
public List<T> getList() {
return list;
}
public void setList(List<T> list) {
this.list = list;
this.initialize();
}
public long getTotal() {
return total;
}
public void setTotal(long total) {
this.total = total;
}
/**
* 初始化参数
*/
public void initialize(){
//1
this.first = 1;
this.last = (int)(total / (this.pageSize < 1 ? 20 : this.pageSize) + first - 1);
if (this.total % this.pageSize != 0 || this.last == 0) {
this.last++;
}
if (this.last < this.first) {
this.last = this.first;
}
if (this.pageNo <= 1) {
this.pageNo = this.first;
this.firstPage=true;
}
if (this.pageNo >= this.last) {
this.pageNo = this.last;
this.lastPage=true;
}
if (this.pageNo < this.last - 1) {
this.next = this.pageNo + 1;
} else {
this.next = this.last;
}
if (this.pageNo > 1) {
this.prev = this.pageNo - 1;
} else {
this.prev = this.first;
}
//2
if (this.pageNo < this.first) {// 如果当前页小于首页
this.pageNo = this.first;
}
if (this.pageNo > this.last) {// 如果当前页大于尾页
this.pageNo = this.last;
}
}
}
LuceneSearch工具类
package com.leadingsoft.lucence;
import com.leadingsoft.lucence.convertor.LuceneConvert;
import com.leadingsoft.lucence.convertor.MapLuceneConvertor;
import org.apache.lucene.document.Document;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* 基本搜索
*
* @author DF
*/
public class LuceneSearch {
/**
* 日志对象
*/
private static Logger logger = LoggerFactory.getLogger(LuceneSearch.class);
/**
* 分页查询
*
* @param page
* @param query
* @param convert
* @return
*/
public static <T> LucenePage<T> findPage(LucenePage<T> page, Query query, LuceneConvert<T> convert) {
IndexSearcher indexSearcher = LuceneUtil.getIndexSearcher();
int n = page.getFirstResult() + page.getPageSize();//查询前n条
try {
TopDocs topDocs = indexSearcher.search(query, n);
long total = topDocs.totalHits; //实际总数
page.setTotal(total);
//会延迟加载
ScoreDoc[] scoreDocs = topDocs.scoreDocs;//指定前n条 实际得到的数量
//分页
int endIndex = Math.min(n, scoreDocs.length);
List<T> list = new ArrayList<T>();
for (int i = page.getFirstResult(); i < endIndex; i++) {
Document doc = indexSearcher.doc(scoreDocs[i].doc);
T t = convert.docToObject(doc);
list.add(t);
}
page.setList(list);
logger.debug("一共匹配到:{},实际取:{}", total, scoreDocs.length);
} catch (IOException e) {
logger.error("索引匹配失败");
throw new RuntimeException(e);
}
return page;
}
/**
* 查询所有
*
* @param query
* @param convert
* @return
*/
public static <T> List<T> findAll(Query query, LuceneConvert<T> convert) {
LucenePage<T> page = new LucenePage<T>(1, Integer.MAX_VALUE);
return findPage(page, query, convert).getList();
}
public static <T> Page<T> getAll(Pageable pageable, Query query, LuceneConvert<T> convert) {
Pageable page = new PageRequest(0, Integer.MAX_VALUE);
return get(page, query, convert);
}
public static <T> Page<T> get(Pageable page, Query query, LuceneConvert<T> convert) {
IndexSearcher indexSearcher = LuceneUtil.getIndexSearcher();
int n = page.getPageNumber() * page.getPageSize() + page.getPageSize();//查询前n条
try {
TopDocs topDocs = indexSearcher.search(query, n);
long total = topDocs.totalHits; //实际总数
// page.setTotal(total);
//会延迟加载
ScoreDoc[] scoreDocs = topDocs.scoreDocs;//指定前n条 实际得到的数量
//分页
int endIndex = Math.min(n, scoreDocs.length);
List<T> list = new ArrayList<T>();
for (int i = page.getPageNumber() * page.getPageSize(); i < endIndex; i++) {
Document doc = indexSearcher.doc(scoreDocs[i].doc);
T t = convert.docToObject(doc);
list.add(t);
}
// page.setList(list);
Page<T> t = new PageImpl<T>(list, page, total);
logger.debug("一共匹配到:{},实际取:{}", total, scoreDocs.length);
return t;
} catch (Exception e) {
logger.error("索引匹配失败");
throw new RuntimeException(e);
}
}
public static Map print4SortTop1(Query query, Sort sort) {
IndexSearcher indexSearcher = LuceneUtil.getIndexSearcher();
try {
TopDocs topDocs = indexSearcher.search(query, 1, sort);
long total = topDocs.totalHits; //实际总数
// page.setTotal(total);
//会延迟加载
ScoreDoc[] scoreDocs = topDocs.scoreDocs;//指定前n条 实际得到的数量
if (scoreDocs.length == 0)
return null;
//分页
int endIndex = Math.min(1, scoreDocs.length);
List<Map> list = new ArrayList<Map>();
MapLuceneConvertor convert = new MapLuceneConvertor();
Document doc = indexSearcher.doc(scoreDocs[0].doc);
Map t = convert.docToObject(doc);
return t;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Document 对象与map对象的转换工具类
package com.leadingsoft.lucence.convertor;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexableField;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@Component
public class MapLuceneConvertor implements LuceneConvert<Map> {
@Override
public Map docToObject(Document doc) {
Map map = new HashMap();
// 转换成map对象
for (IndexableField field : doc.getFields()) {
map.put(field.name(), field.stringValue());
}
return map;
}
@Override
public Document objectToDoc(Map map) {
Document doc = new Document();
Iterator<Map.Entry> entries = map.entrySet().iterator();
while (entries.hasNext()) {
Map.Entry entry = entries.next();
doc.add(new TextField((String) entry.getKey() ,(String) entry.getValue() , Field.Store.YES));
}
return doc;
}
}