原文:http://www.dev26.com/blog/article/351
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.mira.lucene.analysis.IK_CAnalyzer;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.FileReader;
import java.util.Date;
public class Searcher {
private static String INDEX_DIR = Searcher.class.getResource("/").getPath()+"/data/index";//"c:\\lucene\\index";
private static String DOC_DIR = Searcher.class.getResource("/").getPath();//"c:\\lucene\\doc";
public static void main(String[] args) throws Exception {
String queryString;
queryString = "测试";
File indexDir = new File(INDEX_DIR);
File docDir = new File(DOC_DIR);
Date start = new Date();
//必须先建索引
try {
IndexWriter writer = new IndexWriter(INDEX_DIR, new IK_CAnalyzer(), true);
System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
indexDocs(writer, docDir);
System.out.println("Optimizing...");
writer.optimize();
writer.close();
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
if (!indexDir.exists() || !indexDir.isDirectory()) {
throw new Exception(indexDir
+ " does not exist or is not a directory.");
}
search(indexDir, queryString);
}
private static void indexDocs(IndexWriter writer, File file)
throws IOException {
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
System.out.println("adding " + file);
try {
writer.addDocument(getDocument(file));
}
catch (FileNotFoundException fnfe) {
//
}
}
}
}
private static Document getDocument(File f)
throws java.io.FileNotFoundException {
Document doc = new Document();
doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("modified",
DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("contents", new FileReader(f)));
return doc;
}
public static void search(File indexDir, String q) throws Exception {
Directory fsDir = FSDirectory.getDirectory(indexDir);
IndexSearcher is = new IndexSearcher(fsDir);// ① 打开索引
Query query = new QueryParser("contents", new IK_CAnalyzer()).parse(q); // ② 分析查询
long start = new Date().getTime();
Hits hits = is.search(query);// ③ 搜索索引
long end = new Date().getTime();
System.err.println("Found " + hits.length() + " document(s) (in "
+ (end - start) + "milliseconds) that matched query" + q + ":");
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i); // ④ 得到匹配的文档
System.out.println("file: " + doc.get("path"));
}
}
}
正向全切分分词器:org.mira.lucene.analysis.IK_CAnalyzer(适合建索引时使用)
正向最大全切分分词器:org.mira.lucene.analysis.MIK_CAnalyzer(适合用户输入检索时使用)