- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.DateTools;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.mira.lucene.analysis.IK_CAnalyzer;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.FileReader;
- import java.util.Date;
- public class Searcher {
- private static String INDEX_DIR = Searcher.class.getResource("/").getPath()+"/data/index";//"c://lucene//index";
- private static String DOC_DIR = Searcher.class.getResource("/").getPath();//"c://lucene//doc";
- public static void main(String[] args) throws Exception {
- String queryString;
- queryString = "测试";
- File indexDir = new File(INDEX_DIR);
- File docDir = new File(DOC_DIR);
- Date start = new Date();
- //必须先建索引
- try {
- IndexWriter writer = new IndexWriter(INDEX_DIR, new IK_CAnalyzer(), true);
- System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
- indexDocs(writer, docDir);
- System.out.println("Optimizing...");
- writer.optimize();
- writer.close();
- Date end = new Date();
- System.out.println(end.getTime() - start.getTime() + " total milliseconds");
- } catch (IOException e) {
- System.out.println(" caught a " + e.getClass() +
- "/n with message: " + e.getMessage());
- }
- if (!indexDir.exists() || !indexDir.isDirectory()) {
- throw new Exception(indexDir
- + " does not exist or is not a directory.");
- }
- search(indexDir, queryString);
- }
- private static void indexDocs(IndexWriter writer, File file)
- throws IOException {
- if (file.canRead()) {
- if (file.isDirectory()) {
- String[] files = file.list();
- if (files != null) {
- for (int i = 0; i < files.length; i++) {
- indexDocs(writer, new File(file, files[i]));
- }
- }
- } else {
- System.out.println("adding " + file);
- try {
- writer.addDocument(getDocument(file));
- }
- catch (FileNotFoundException fnfe) {
- //
- }
- }
- }
- }
- private static Document getDocument(File f)
- throws java.io.FileNotFoundException {
- Document doc = new Document();
- doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
- doc.add(new Field("modified",
- DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
- doc.add(new Field("contents", new FileReader(f)));
- return doc;
- }
- public static void search(File indexDir, String q) throws Exception {
- Directory fsDir = FSDirectory.getDirectory(indexDir);
- IndexSearcher is = new IndexSearcher(fsDir);// ① 打开索引
- Query query = new QueryParser("contents", new IK_CAnalyzer()).parse(q); // ② 分析查询
- long start = new Date().getTime();
- Hits hits = is.search(query);// ③ 搜索索引
- long end = new Date().getTime();
- System.err.println("Found " + hits.length() + " document(s) (in "
- + (end - start) + "milliseconds) that matched query" + q + ":");
- for (int i = 0; i < hits.length(); i++) {
- Document doc = hits.doc(i); // ④ 得到匹配的文档
- System.out.println("file: " + doc.get("path"));
- }
- }
- }
- 正向全切分分词器:org.mira.lucene.analysis.IK_CAnalyzer(适合建索引时使用)
正向最大全切分分词器:org.mira.lucene.analysis.MIK_CAnalyzer(适合用户输入检索时使用)
来源:http://hi.baidu.com/happy19840402/blog/item/7f48ce2e462aff554fc226d6.html