Lucene读书笔记——1. 初识Lucene

Lucene是什么

Lucene是一款高性能的、可扩展的信息检索(IR)工具库。文档搜索、文档内信息搜索或者文档相关的元数据搜索等操作。

Lucene和搜索程序组件

用户接口、构建可编程查询语句的方法、执行查询语句(或者检索匹配文档)、展现查询结果等。

Lucene实战:程序示例

package hdli.lucene.chapter1;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Indexer {

    public static void main(String[] args) throws IOException {
//        if(args.length != 2) {
//            System.out.println("Usage: java " + Indexer.class.getName() + "parameter number is wrong");
//        }

//        String indexDir = args[0];
//        String dataDir = args[1];
        String indexDir = "D:\\lucene_data\\chapter1\\index";
        String dataDir = "D:\\lucene_data\\chapter1\\src_data";

        long start = System.currentTimeMillis();
        Indexer indexer = new Indexer(indexDir);
        int numIndexed = 0;
        try {
            numIndexed = indexer.index(dataDir, new TextFilesFilter());
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            indexer.close();
        }
        long end = System.currentTimeMillis();
        System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " millseconds");
    }

    private IndexWriter writer;

    public Indexer(String indexDir) throws IOException {
        Directory dir = FSDirectory.open(new File(indexDir));
        writer = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
    }

    public void close() throws IOException {
        writer.close();
    }

    public int index(String dataDir, FileFilter filter) throws Exception {
        File[] files = new File(dataDir).listFiles();
        for(File f : files) {
            if(!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) {
                indexFile(f);
            }
        }
        return writer.numDocs();
    }

    protected Document getDocument(File f) throws Exception {
        Document doc = new Document();
        doc.add(new Field("contents", new FileReader(f)));
        doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

        return doc;
    }

    private void indexFile(File f) throws Exception {
        System.out.println("Indexing " + f.getCanonicalPath());
        Document doc = getDocument(f);
        writer.addDocument(doc);
    }

    public static class TextFilesFilter implements FileFilter {

        public boolean accept(File path) {
            return path.getName().toLowerCase().endsWith(".txt");
        }
    }
}


package hdli.lucene.chapter1;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher {

    public static void main(String... args) throws Exception {
//        if(args.length != 2) {
//            System.out.println("Usage: java " + Searcher.class.getName() + "parameter number is wrong");
//        }

//        String indexDir = args[0];
//        String q = args[1];
        String indexDir = "D:\\lucene_data\\chapter1\\index";
        String q = "another AND context";
        search(indexDir, q);
    }

    public static void search(String indexDir, String q) throws Exception {
        Directory dir = FSDirectory.open(new File(indexDir));
        IndexSearcher is = new IndexSearcher(dir);
        IndexReader reader = is.getIndexReader();
        reader.reopen();
        QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30));
        Query query = parser.parse(q);

        long start = System.currentTimeMillis();
        TopDocs hits = is.search(query, 10);
        long end = System.currentTimeMillis();

        System.out.println("Found " + hits.totalHits + " document(s) (in " + (end -start) + " milliseconds) that matched query '" + q +"':");

        for(ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            System.out.println(doc.get("fullpath"));
        }
        is.close();
    }
}


理解索引过程的核心类
IndexWriter
Directory
Analyzer
Document
Field

理解搜索过程的核心类
IndexSearcher
Term
Query
TermQuery
TopDocs
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值