需求:
内容包括两部分,一部分为唯一标示即id,一部分为内容,要实现对内容基于lucene的检索,模拟实现基于uuid作为id,内容为普通的字符串
环境:
<dependency> <groupId>org.safehaus.jug</groupId> <artifactId>jug</artifactId> <version>2.0.0</version> <classifier>lgpl</classifier> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>IKAnalyzer</groupId> <artifactId>IKAnalyzer</artifactId> <version>IKAnalyzer2012_u6</version> <scope>system</scope> <systemPath>D:/tools/lib/IKAnalyzer2012_u6.jar</systemPath> </dependency>
实现:
public class IndexService {
private static final UUIDGenerator generator = UUIDGenerator.getInstance();
private static final String indexDir = "D:/tools/index";
private static final Occur[] QUERY_FLAGS = { Occur.SHOULD, Occur.SHOULD };
public void createIndex(String content) throws Exception {
UUID uuid = generator.generateRandomBasedUUID();
Directory directory = new org.apache.lucene.store.SimpleFSDirectory(
new File(indexDir));
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34,
analyzer);
IndexWriter writer = new IndexWriter(directory, config);
Document doc = new Document();
doc.add(new Field("str", content, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("id", uuid.toString(), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
writer.optimize();
writer.close(true);
}
public void queryIndex(String queryString) throws Exception {
Analyzer analyzer = new IKAnalyzer();
BooleanQuery bq = new BooleanQuery();
Query q = MultiFieldQueryParser.parse(Version.LUCENE_34, queryString,
new String[] { "str", "id" }, QUERY_FLAGS, analyzer);
bq.add(q, BooleanClause.Occur.MUST);
Directory directory = new org.apache.lucene.store.SimpleFSDirectory(
new File(indexDir));
IndexSearcher indexSearcher = new IndexSearcher(directory);
TopDocs topDocs = indexSearcher.search(bq, 2);
System.out.println(topDocs.totalHits);
ScoreDoc[] hits = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : hits) {
Document document = indexSearcher.doc(scoreDoc.doc);
System.out.println(String.format("doc id is %s ,and score is %s:",
document.get("id"), scoreDoc.score + ""));
}
indexSearcher.close();
}
public static void main(String[] ar) throws Exception {
IndexService indexService = new IndexService();
StringBuilder content = new StringBuilder();
content.append("茶道,就是品赏茶的美感之道。茶道是一种烹茶饮茶的生活艺术,一种以茶为媒的生活礼仪,一种以茶修身的生活方式。");
content.append("它通过沏茶、赏茶、闻茶、饮茶、增进友谊,美心修德,学习礼法,是很有益的一种和美仪式。");
content.append("喝茶能静心、静神,有助于陶冶情操、去除杂念,符合儒道的“内省修行”思想。");
content.append("茶道精神是茶文化的核心,是茶文化的灵魂。本账户的宗旨是传播茶叶,茶道,品茶,辨别的知识,让大家了解中华国粹。");
// 模拟100篇文本内容
for (int i = 0; i < 100; i++) {
indexService.createIndex(content.toString());
}
indexService.queryIndex("茶叶");
}
}