Lucene是什么
Lucene是一款高性能的、可扩展的信息检索(IR)工具库。文档搜索、文档内信息搜索或者文档相关的元数据搜索等操作。
Lucene和搜索程序组件
用户接口、构建可编程查询语句的方法、执行查询语句(或者检索匹配文档)、展现查询结果等。
Lucene实战:程序示例
package hdli.lucene.chapter1;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Indexer {
public static void main(String[] args) throws IOException {
// if(args.length != 2) {
// System.out.println("Usage: java " + Indexer.class.getName() + "parameter number is wrong");
// }
// String indexDir = args[0];
// String dataDir = args[1];
String indexDir = "D:\\lucene_data\\chapter1\\index";
String dataDir = "D:\\lucene_data\\chapter1\\src_data";
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed = 0;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} catch (Exception e) {
e.printStackTrace();
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " millseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws IOException {
writer.close();
}
public int index(String dataDir, FileFilter filter) throws Exception {
File[] files = new File(dataDir).listFiles();
for(File f : files) {
if(!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs();
}
protected Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f)));
doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
public static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase().endsWith(".txt");
}
}
}
package hdli.lucene.chapter1;
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void main(String... args) throws Exception {
// if(args.length != 2) {
// System.out.println("Usage: java " + Searcher.class.getName() + "parameter number is wrong");
// }
// String indexDir = args[0];
// String q = args[1];
String indexDir = "D:\\lucene_data\\chapter1\\index";
String q = "another AND context";
search(indexDir, q);
}
public static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher is = new IndexSearcher(dir);
IndexReader reader = is.getIndexReader();
reader.reopen();
QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30));
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("Found " + hits.totalHits + " document(s) (in " + (end -start) + " milliseconds) that matched query '" + q +"':");
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fullpath"));
}
is.close();
}
}
理解索引过程的核心类
IndexWriter
Directory
Analyzer
Document
Field
理解搜索过程的核心类
IndexSearcher
Term
Query
TermQuery
TopDocs