lucene各版本之间实现方法有些变化,此文记录下lucene5的实现方式。
一、pom文件
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
二、代码实现
package com.jthao.lucene.example1;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class IndexCrud {
String[] ids = {"1", "2"};
String[] projectNames = {"北京中咨时代资产管理有限公司100%股权", "台湾菲格科技发展有限公司100%股权转让"};
String[] projectCodes = {"G32018BJ1000852-0", "G32018FJ1000017-0"};
String[] zones = {"上海", "北京"};
String[] tags = {"商务服务业", "批发业"};
private Directory directory;
private Analyzer analyzer;
private IndexWriterConfig config;
private IndexWriter writer;
private IndexReader reader;
public IndexCrud() {
try {
//本地存储
directory = FSDirectory.open(Paths.get("C:\\home\\iflow\\lucene1"));
//内存存储
//directory = new RAMDirectory();
//初始化分词器
analyzer = new IKAnalyzer();
//config
config = new IndexWriterConfig(analyzer);
//index writer
writer = new IndexWriter(directory, config);
//
createIndex();
//index reader
reader = DirectoryReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
IndexCrud indexCrud = new IndexCrud();
try {
// indexCrud.deleteAllIndex();
indexCrud.queryIndex();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void display(String str, Analyzer a) {
TokenStream stream = null ;
try {
stream = a.tokenStream( "renyi", new StringReader(str)) ;
PositionIncrementAttribute pia = stream.addAttribute(PositionIncrementAttribute.class ) ; //保存位置
OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class ) ; //保存辞与词之间偏移量
CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class ) ;//保存响应词汇
TypeAttribute ta = stream.addAttribute(TypeAttribute.class ) ; //保存类型
//在lucene 4 以上 要加入reset 和 end方法
stream.reset() ;
while (stream.incrementToken()) {
System. out.println(pia.getPositionIncrement() + ":[" + cta.toString() + "]:" + oa.startOffset() + "->" + oa.endOffset() + ":" + ta.type());
}
stream.end() ;
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 索引更新
*/
public void updateIndex() {
try {
Term term = new Term("id", "2");
Document doc = new Document();
doc.add(new StringField("id", ids[1], Field.Store.YES));
doc.add(new StringField("name", "lsup", Field.Store.YES));
//更新的时候,会把原来那个索引删掉,重新生成一个索引
writer.updateDocument(term, doc);
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 索引全部删除
*/
public void deleteAllIndex() {
try {
writer.deleteAll();
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 索引删除
*/
public void deleteIndex() {
try {
Term[] terms = new Term[2];
Term term = new Term("id", "1");
terms[0] = term;
term = new Term("id", "3");
terms[1] = term;
//将id为 1和3的索引删除。
//也可以传一个Query数组对象,将Query查找的结果删除。
writer.deleteDocuments(terms);
//deleteDocuments
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSearcher() {
try {
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader);
if (newReader != null) {
reader.close();
reader = newReader;
}
} catch (IOException e) {
e.printStackTrace();
}
return new IndexSearcher(reader);
}
/**
* 根据条件查找索引
*/
public void queryIndex() throws InvalidTokenOffsetsException {
try {
//搜索器
IndexSearcher searcher = getSearcher();
//查询哪个字段
BooleanQuery.Builder builder = new BooleanQuery.Builder();
//全文检索
QueryParser parse = new MultiFieldQueryParser(new String[]{"projectName", "projectCode", "zone", "tag"}, analyzer);
// QueryParser parse = new QueryParser("zone", analyzer);
//查询关键字
Query query = parse.parse("商务服务");
builder.add(query, BooleanClause.Occur.MUST);
TopDocs topDocs = searcher.search(query, 1000);
//碰撞结果
ScoreDoc[] hits = topDocs.scoreDocs;
for (int i = 0; i < hits.length; i++) {
ScoreDoc hit = hits[i];
Document hitDoc = searcher.doc(hit.doc);
//结果按照得分来排序。主要由 关键字的个数和权值来决定
System.out.println("(" + hit.doc + "-" + hit.score + ")" + "id:" + hitDoc.get("id") + " projectName:"
+ hitDoc.get("projectName") + " projectCode:" + hitDoc.get("projectCode") + " zone:" + hitDoc.get("zone") + " tag:" + hitDoc.get("tag"));
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
/**
* 读取索引
*/
public void readIndex() {
System.out.println("max num:" + reader.maxDoc());
System.out.println("index num:" + reader.numDocs());
//删除了的索引数
System.out.println("delete index num:" + reader.numDeletedDocs());
}
/**
* 创建索引
*/
public void createIndex() {
try {
//创建writer
List<Document> docs = new ArrayList<Document>();
for (int i = 0; i < ids.length; i++) {
Document doc = new Document();
doc.add(new StringField("id", ids[i], Field.Store.YES));
Field projectName = new TextField("projectName", projectNames[i], Field.Store.YES);
Field projectCode = new TextField("projectCode", projectCodes[i], Field.Store.YES);
Field zone = new TextField("zone", zones[i], Field.Store.YES);
Field tag = new TextField("tag", tags[i], Field.Store.YES);
doc.add(projectName);
doc.add(projectCode);
doc.add(zone);
doc.add(tag);
//1.权值越高,查询结果越靠前。
tag.setBoost(2.5f);
projectName.setBoost(2.0f);
projectCode.setBoost(2.0f);
zone.setBoost(0.5f);
//对于内容只索引不存储
// doc.add(new TextField("content", contents[i], Field.Store.NO));
writer.addDocument(doc);
}
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
}