还是看代码来的直接:
1. 索引
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
/**
* Lucene索引类示例
* 代码摘自 Lucene In Action 2nd Edition
*/
public class Indexer {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage: java " + Indexer.class.getName()
+ " <index dir> <data dir>");
}
// 1 Lucene索引文件存放的位置
String indexDir = args[0];
// 2 索引此目录下的 *.txt 文件
String dataDir = args[1];
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed = indexer.index(dataDir);
indexer.close();
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = new SimpleFSDirectory(new File(indexDir), null);
// 3 创建Lucene的 IndexWriter 类
writer = new IndexWriter( dir,
new StandardAnalyzer(Version.LUCENE_30), true,
IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws IOException {
// 4 关闭IndexWriter
writer.close();
}
public int index(String dataDir) throws Exception {
File[] files = new File(dataDir).listFiles();
for (int i = 0; i < files.length; i++) {
File f = files[i];
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
&& acceptFile(f)) {
indexFile(f);
}
}
// 5 返回被索引的文档的数量
return writer.numDocs();
}
protected boolean acceptFile(File f) {
// 6 仅索引 .txt 文件类型
return f.getName().endsWith(".txt");
}
protected Document getDocument(File f) throws Exception {
Document doc = new Document();
// 7 索引文件的文本内容
doc.add(new Field("contents", new FileReader(f)));
// 8 索引文件路径
doc.add(new Field("filename", f.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
if (doc != null) {
// 9 将document添加到索引中
writer.addDocument(doc);
}
}
}
2. 搜索
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
/**
* Lucene搜索类示例
* 代码摘自 Lucene In Action 2nd Edition
*/
public class Searcher {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage: java " + Searcher.class.getName() + " <index dir> <query>");
}
//1 由Indexer类创建的索引文件的位置
String indexDir = args[0];
//2 查询条件
String q = args[1];
search(indexDir, q);
}
public static void search(String indexDir, String q) throws Exception {
Directory dir = new SimpleFSDirectory(new File(indexDir), null);
//3 打开索引文件
IndexSearcher is = new IndexSearcher(dir);
//4 分析查询条件
QueryParser parser = new QueryParser(
Version.LUCENE_30,
"contents",
new StandardAnalyzer(Version.LUCENE_30));
Query query = parser.parse(q);
long start = System.currentTimeMillis();
//5 搜索索引 TopDocs对象仅保存对底层文档的引用,第7步才实际加载
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
//6 输出一些基本查询信息
System.err.println(
"Found " + hits.totalHits + " document(s) (in " +
(end - start) + " milliseconds) that matched query '" +
q + "':");
for(int i=0;i<hits.scoreDocs.length;i++) {
ScoreDoc scoreDoc = hits.scoreDocs[i];
//7 检索符合的文档
Document doc = is.doc(scoreDoc.doc);
//8 输出符合的文件名
System.out.println(doc.get("filename"));
}
//9 关闭索引
is.close();
}
}