使用Lucene2.4时的一些注意点(新版本的修改点)

最新推荐文章于 2022-06-22 16:46:07 发布

昆山人在上海

最新推荐文章于 2022-06-22 16:46:07 发布

阅读量2.1k

点赞数

分类专栏： Java相关文章标签： lucene string search query blog file

本文链接：https://blog.csdn.net/kunshan_shenbin/article/details/3165379

版权

Java相关专栏收录该内容

392 篇文章 0 订阅

订阅专栏

Lucene2.4版本已经在很多地方有了修改，可能是为了迎接Lucene3.0的到来所做的调整吧。
以前写的Blog（http://blog.csdn.net/kunshan_shenbin/archive/2008/06/02/2503388.aspx）在2.3.2版本下测试通过，但在2.4下必须修改。

代码如下：

Indexer.java

 
 package com.lucene.index;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
public class Indexer {
    public static void main(String[] args) throws IOException {
        File indexDir = new File("C://test//index");
        File dataDir = new File("C://test//data");
        int numIndexed = index(indexDir, dataDir);
        System.out.println(numIndexed);
    }
    public static int index(File indexDir, File dataDir) throws IOException {
        if (!indexDir.exists() || !dataDir.isDirectory()) {
            throw new IOException();
        }
        IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
        writer.setUseCompoundFile(false);
        indexDirectory(writer, dataDir);
        int numIndexed = writer.maxDoc();
        writer.optimize();
        writer.close();
        return numIndexed;
    }
    private static void indexDirectory(IndexWriter writer, File dir)
            throws IOException {
        File[] files = dir.listFiles();
        for (int i = 0; i < files.length; i++) {
            File f = files[i];
            if (f.isDirectory()) {
                indexDirectory(writer, f);
            } else if (f.getName().toLowerCase().endsWith(".txt")) {
                indexFile(writer, f);
            }
        }
    }
    public static void indexFile(IndexWriter writer, File f) throws IOException {
        if (f.isHidden() || !f.exists() || !f.canRead()) {
            return;
        }
        System.out.println("Indexing " + f.getCanonicalPath());
        Document doc = new Document();
        doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES,
                Field.Index.NOT_ANALYZED));
        doc.add(new Field("contents", new FileReader(f)));
        writer.addDocument(doc);
    }
}
 

Searcher.java

 
 package com.lucene.search;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Searcher {
    public static void main(String[] args) throws Exception {
        File indexDir = new File("C://test//index");
        String q = "111";
        if (!indexDir.exists() || !indexDir.isDirectory()) {
            throw new IOException();
        }
        search(indexDir, q);
    }
    public static void search(File indexDir, String q) throws Exception {
        Directory fsDir = FSDirectory.getDirectory(indexDir);
        IndexSearcher searcher = new IndexSearcher(fsDir);
        QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
        Query query = parser.parse(q);
        
        TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size
        ScoreDoc[] hits = topDocs.scoreDocs;
        System.out.println("共有" + searcher.maxDoc() + "条索引，命中" + hits.length + "条");
        for (int i = 0; i < hits.length; i++) {
            int DocId = hits[i].doc;
            Document document = searcher.doc(DocId);
            System.out.println(DocId + ":" + document.get("filename"));
        }
    }
}