本文采用lucene4.0.0和IKAnalyzer中文分词器
一、创建索引
package com.jmj.project.web;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Abc {
private Logger logger = LoggerFactory.getLogger(getClass());
public String createIndex() {
String[] ids = { "1", "2" };
String[] contents = { "我是第一条内容,i am lucene IKAnalyzer", "我在北京天安门广场吃炸鸡" };
String[] citys = { "北京", "上海" };
String indexPath = "/home/iflow/luceneindex"; // 建立索引文件的目录
// 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
Analyzer analyzer = new IKAnalyzer();
IndexWriter indexWriter = null;
Directory directory = null;
try {
directory = FSDirectory.open(new File(indexPath));
indexWriter = getIndexWriter(directory, analyzer);
} catch (Exception e) {
logger.info("索引打开异常!");
}
// 添加索引
try {
Document document = null;
for (int i=0; i < ids.length; i++) {
document = new Document();
document.add(new StringField("id", ids[i], Field.Store.YES));
Field content = new TextField("content", contents[i], Field.Store.YES);
Field city = new TextField("city", citys[i], Field.Store.YES);
document.add(content);
document.add(city);
// 1.权值越高,查询结果越靠前。
content.setBoost(2.0f);
city.setBoost(1.0f);
indexWriter.addDocument(document);
}
indexWriter.commit();
} catch (IOException e1) {
logger.info("索引创建异常!");
}
try {
closeWriter(indexWriter);
} catch (Exception e) {
logger.info("索引关闭异常!");
}
logger.info("索引创建成功!");
return null;
}
/**
* 获得indexwriter对象
*
* @param dir
* @return
* @throws IOException
* @throws Exception
*/
private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
return new IndexWriter(dir, iwc);
}
/**
* 关闭indexwriter对象
*
* @throws IOException
*
* @throws Exception
*/
private void closeWriter(IndexWriter indexWriter) throws IOException {
if (indexWriter != null) {
indexWriter.close();
}
}
}
二、查询索引
package com.jmj.project.web;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Abc {
private Logger logger = LoggerFactory.getLogger(getClass());
public PageResults<ViProject> indexSearch(Map<String, Object> filterMap)
throws ParseException, IOException, InvalidTokenOffsetsException, java.text.ParseException {
String indexPath = "/home/iflow/luceneindex"; // 建立索引文件的目录
Analyzer analyzer = new IKAnalyzer();
Directory directory = null;
try {
directory = FSDirectory.open(new File(indexPath));
} catch (Exception e) {
logger.info("索引打开异常!");
}
IndexReader ireader = null;
IndexSearcher isSearcher = null;
try {
ireader = IndexReader.open(directory);
} catch (IOException e) {
logger.info("索引打开异常!");
}
isSearcher = new IndexSearcher(ireader);
BooleanQuery booleanQuery = new BooleanQuery();
// eg:多个字段查询
// String text = "北京";
// String[] fields = { "content", "city" };
// QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
// Query query = qp.parse(text);
// booleanQuery.add(query, Occur.MUST);
// eg:单个字段查询
String text = "北京";
QueryParser qp = new QueryParser(Version.LUCENE_40, "content", analyzer);
Query query = qp.parse(text);
booleanQuery.add(query, Occur.MUST);
// 搜索相似度最高的100条记录
TopDocs topDocs = isSearcher.search(booleanQuery, 100);
// 输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < scoreDocs.length; i++) {
Document targetDoc = isSearcher.doc(scoreDocs[i].doc);
System.out.println(targetDoc.get("id") + targetDoc.get("content") + targetDoc.get("city"));
}
ireader.close();
return null;
}
}
三、pom文件
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>com.lucene</groupId>
<artifactId>ikAnalyzer</artifactId>
<version>RELEASE</version>
</dependency>