因为项目的需要一个搜索所以研究了一下Lucene 这个工具包。这个是由apache软件基金会4 jakarta项目组的一个子项目。是一个开源的全文搜索引擎工具包。
lucene会在本地生成索引,之后需要查询的时候回直接查询索引。索引文件是可以存在磁盘和内存两种方式。对于我的本子来说开发已经竭尽全力,所以我选择了在磁盘上生成索引文件。
绘画少说开始上码:
首先我们添加jar 包 pom.xml
<!-- ik.中文分词器依赖-->
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
<!-- lucene依赖 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.10.2</version>
</dependency>
为了方便配置我在Spring boot 中添加了一点自定义属性方便之后修改lucene生成的索引。
application-dev.yml (ps:这个配置可以自定义的去做或者不去弄都可以。)
lucene-params:
enterprise-url: '//app//www//bigdata_net//dict'
配置了索引生成的位置后我们开始获取索引文件生成的位置。
LucaenceConfig.java
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "lucene-params")
public class LucaenceConfig {
private String enterpriseUrl;
public String getEnterpriseUrl() {
return enterpriseUrl;
}
public void setEnterpriseUrl(String enterpriseUrl) {
this.enterpriseUrl = enterpriseUrl;
}
}
lucene工具类
LucenceUtil.java
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Component
public class LucenceUtil {
@Autowired
private LucaenceConfig lucconfig;
/**
* 中文分词器。
*/
static Analyzer analyzer = new IKAnalyzer();
/**
* 写入索引
* @param doc 写入索引的数据。
*/
public void write(Document doc) {
try {
//索引库的存储目录
Directory directory = FSDirectory.open(new File(lucconfig.getEnterpriseUrl()));
//关联当前lucence版本和分值器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//传入目录和分词器
IndexWriter iwriter = new IndexWriter(directory, config);
//写入到目录文件中
iwriter.addDocument(doc);
//提交事务
iwriter.commit();
//关闭流
iwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
*
* 批量添加索引文件
* @param doc 写入索引的数据。
*/
public void writeList(List<Document> doc) {
try {
System.out.println(lucconfig.getEnterpriseUrl());
//索引库的存储目录
Directory directory = FSDirectory.open(new File(lucconfig.getEnterpriseUrl()));
//关联当前lucence版本和分值器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//传入目录和分词器
IndexWriter iwriter = new IndexWriter(directory, config);
//写入到目录文件中
for (Document d : doc)
iwriter.addDocument(d);
//提交事务
iwriter.commit();
//关闭流
iwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 搜索返回索引数据
* @param field 搜素的指定的key
* @param value 搜索内容
* @param current 分页页数
* @param size 每页显示的数量
* @return
* @throws Exception
*/
public Map<String, Object> search(String field, String value, int current, int size) throws Exception {
Map<String, Object> map = new HashMap<>();
//索引库的存储目录
Directory directory = FSDirectory.open(new File(lucconfig.getEnterpriseUrl()));
//读取索引库的存储目录
DirectoryReader ireader = DirectoryReader.open(directory);
//搜索类
IndexSearcher isearcher = new IndexSearcher(ireader);
//关联当前lucence版本和分值器
QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
//搜索
Query query = parser.parse(value);
List<Document> list = new ArrayList<>();
//获取搜索的结果,指定返回document返回的个数
ScoreDoc[] hits = isearcher.search(query, null, 100).scoreDocs;
//计算分页条数放入列表中。
int start = (current - 1) * size;
int end = current * size;
end = hits.length < end ? hits.length : end;
for (int i = start; i < end; i++) {
int id = hits[i].doc;
Document hitDoc = isearcher.doc(hits[i].doc);
list.add(hitDoc);
}
/**
* 把数据集合放入map李方便返回
*/
map.put("length", hits.length);
map.put("list", list);
ireader.close();
directory.close();
return map;
}
/**
* 修改索引
* @param field 指定查询的字段
* @param value 指定查询value
* @param dcval 写入索引的数据。
* @throws Exception
*/
public void upateIndexes(String field, String value, Document dcval) throws Exception {
if (!searchById(field, value)) {
System.out.println("添加索引");
write(dcval);
}else{
//索引库的存储目录
FSDirectory dir = FSDirectory.open(new File(lucconfig.getEnterpriseUrl()));
//关联当前lucence版本和分值器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//传入目录和分词器
IndexWriter writer = new IndexWriter(dir, config);
//按照fileName域进行搜索关键字 T
Term term = new Term(field, value);
//更新
writer.updateDocument(term, dcval);
//提交
writer.commit();
//关闭
writer.close();
}
}
/**
* 删除索引
* @param field 指定查询的字段
* @param key 删除的key
*/
public void deleteDoc(String field, String key) {
try {
//索引库的存储目录
FSDirectory dir = FSDirectory.open(new File(lucconfig.getEnterpriseUrl()));
//lucence查询解析器,用于指定查询的属性名和分词器
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//传入目录和分词器
IndexWriter writer = new IndexWriter(dir, config);
//查村的key value
Term term = new Term(field, key);
writer.deleteDocuments(term);
writer.commit();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 精确查询 索引是否存在
* @param field
* @param value
* @return
* @throws Exception
*/
public boolean searchById(String field, String value) throws Exception {
boolean bool = false;
//索引库的存储目录
Directory directory = FSDirectory.open(new File(lucconfig.getEnterpriseUrl()));
//读取索引库的存储目录
DirectoryReader ireader = DirectoryReader.open(directory);
//搜索类
IndexSearcher isearcher = new IndexSearcher(ireader);
//lucence查询解析器,用于指定查询的属性名和分词器
QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
//搜索
Query query = parser.parse(value);
//获取搜索的结果,指定返回document返回的个数
ScoreDoc[] hits = isearcher.search(query, null, 1).scoreDocs;
for (int i = 0; i < hits.length; i++) {
int id = hits[i].doc;
Document hitDoc = isearcher.doc(hits[i].doc);
System.out.println(hitDoc.get(field).toString()+""+value);
if (hitDoc.get(field).toString().equals(value)) {
bool = true;
}
}
ireader.close();
directory.close();
return bool;
}
}
这样我们就可以直接使用lucene
@RequestMapping("seveData")
public int seveData(){
QueryWrapper queryWrapper=new QueryWrapper();
queryWrapper.eq("ENT_STATUS","1");
List<BgEEnterprise> list= service.list(queryWrapper);
List<Document> listdoc=new ArrayList<>();
for (BgEEnterprise ent:list ) {
Document doc=new Document();
doc.add(new Field("entGuid",ent.getEntGuid(),TextField.TYPE_STORED));
doc.add(new Field("entName",ent.getEntName(),TextField.TYPE_STORED));
listdoc.add(doc);
}
indexDemo.writeList(listdoc);
return 1;
}
@RequestMapping("select")
public List<String> seveData(String search) throws Exception {
long time=System.currentTimeMillis();
List<Document> list= (List<Document>) indexDemo.search("entName",search,1,15).get("list");
List<String> listId=new ArrayList<>();
for (Document document:list) {
listId.add(document.get("entName"));
}
System.out.println(System.currentTimeMillis()-time);
return listId;
}
@RequestMapping("update")
public int update(String search) throws Exception {
Document doc=new Document();
doc.add(new Field("entGuid","f19cd99186714d54917bde67fdd08e2a",TextField.TYPE_STORED));
doc.add(new Field("entName","北京茁爱儿教育科技有限公司",TextField.TYPE_STORED));
indexDemo.upateIndexes("entGuid","f19cd99186714d54917bde67fdd08e2a",doc);
return 1;
}
@RequestMapping("add")
public int add(String search) throws Exception {
Document doc=new Document();
doc.add(new Field("entGuid","1533154465444515",TextField.TYPE_STORED));
doc.add(new Field("entName","我这没有人",TextField.TYPE_STORED));
indexDemo.write(doc);
return 1;
}
@RequestMapping("delete")
public int delete(String search) throws Exception {
indexDemo.deleteDoc("entGuid","1533154465444515");
return 1;
}
lucene是个很强大的全文检索工具包。本人也是刚刚接触到lucene如果有什么错误的地方。请斧正