Lucene读书笔记——1. 初识Lucene

最新推荐文章于 2024-09-10 09:07:08 发布

lihuaidong1989

最新推荐文章于 2024-09-10 09:07:08 发布

阅读量307

点赞数

分类专栏： Lucene 文章标签： lucene

本文链接：https://blog.csdn.net/lihuaidong1989/article/details/52144094

版权

Lucene 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

Lucene是什么

Lucene是一款高性能的、可扩展的信息检索（IR）工具库。文档搜索、文档内信息搜索或者文档相关的元数据搜索等操作。

Lucene和搜索程序组件

用户接口、构建可编程查询语句的方法、执行查询语句（或者检索匹配文档）、展现查询结果等。

Lucene实战：程序示例

package hdli.lucene.chapter1;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Indexer {

    public static void main(String[] args) throws IOException {
//        if(args.length != 2) {
//            System.out.println("Usage: java " + Indexer.class.getName() + "parameter number is wrong");
//        }

//        String indexDir = args[0];
//        String dataDir = args[1];
        String indexDir = "D:\\lucene_data\\chapter1\\index";
        String dataDir = "D:\\lucene_data\\chapter1\\src_data";

        long start = System.currentTimeMillis();
        Indexer indexer = new Indexer(indexDir);
        int numIndexed = 0;
        try {
            numIndexed = indexer.index(dataDir, new TextFilesFilter());
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            indexer.close();
        }
        long end = System.currentTimeMillis();
        System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " millseconds");
    }

    private IndexWriter writer;

    public Indexer(String indexDir) throws IOException {
        Directory dir = FSDirectory.open(new File(indexDir));
        writer = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
    }

    public void close() throws IOException {
        writer.close();
    }

    public int index(String dataDir, FileFilter filter) throws Exception {
        File[] files = new File(dataDir).listFiles();
        for(File f : files) {
            if(!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) {
                indexFile(f);
            }
        }
        return writer.numDocs();
    }

    protected Document getDocument(File f) throws Exception {
        Document doc = new Document();
        doc.add(new Field("contents", new FileReader(f)));
        doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

        return doc;
    }

    private void indexFile(File f) throws Exception {
        System.out.println("Indexing " + f.getCanonicalPath());
        Document doc = getDocument(f);
        writer.addDocument(doc);
    }

    public static class TextFilesFilter implements FileFilter {

        public boolean accept(File path) {
            return path.getName().toLowerCase().endsWith(".txt");
        }
    }
}


package hdli.lucene.chapter1;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher {

    public static void main(String... args) throws Exception {
//        if(args.length != 2) {
//            System.out.println("Usage: java " + Searcher.class.getName() + "parameter number is wrong");
//        }

//        String indexDir = args[0];
//        String q = args[1];
        String indexDir = "D:\\lucene_data\\chapter1\\index";
        String q = "another AND context";
        search(indexDir, q);
    }

    public static void search(String indexDir, String q) throws Exception {
        Directory dir = FSDirectory.open(new File(indexDir));
        IndexSearcher is = new IndexSearcher(dir);
        IndexReader reader = is.getIndexReader();
        reader.reopen();
        QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30));
        Query query = parser.parse(q);

        long start = System.currentTimeMillis();
        TopDocs hits = is.search(query, 10);
        long end = System.currentTimeMillis();

        System.out.println("Found " + hits.totalHits + " document(s) (in " + (end -start) + " milliseconds) that matched query '" + q +"':");

        for(ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            System.out.println(doc.get("fullpath"));
        }
        is.close();
    }
}

理解索引过程的核心类

IndexWriter