常量接口
public interface Constants {
public static String indexDir = "D:\\lucene\\index";
public static String dataDir = "D:\\lucene\\test";
}
建立索引
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Indexer {
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
//创建IndexWriter(写索引)
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),
true, IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws IOException {
//关闭IndexWriter
writer.close();
}
public int index(String dataDir, FileFilter filter) throws IOException {
File[] files = new File(dataDir).listFiles();
for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
&& (filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs();
}
public void indexFile(File f) throws IOException {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
public Document getDocument(File f) throws IOException {
Document doc = new Document();
//索引文件内容
doc.add(new Field("contents", new FileReader(f)));
//索引文件名
doc.add(new Field("filename", f.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
//索引文件完整路径
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
return doc;
}
public static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase().endsWith(".txt");
}
}
public static void main(String[] args) throws IOException {
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(Constants.indexDir);
int numIndexed;
numIndexed = indexer.index(Constants.dataDir, new TextFilesFilter());
indexer.close();
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
}
2. 搜索索引
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void search(String indexDir, String q) throws IOException,
ParseException {
//打开索引文件
Directory dir = FSDirectory.open(new File(Constants.indexDir));
IndexSearcher is = new IndexSearcher(dir);
//解析查询字符串
QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",
new StandardAnalyzer(Version.LUCENE_30));
Query query = parser.parse(q);
//搜索索引
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
//显示查询结果
System.out.println("Found " + hits.totalHits + " document(s) (in "
+ (end - start) + " milliseconds) that matched query'" + q
+ "':");
for(ScoreDoc scoreDoc : hits.scoreDocs){
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fullpath"));
}
//关闭IndexSearcher
is.close();
}
public static void main(String[] args) throws IOException, ParseException {
String indexDir = Constants.indexDir;
String q = "中华";
search(indexDir, q);
}
}