lucene学习笔记

最新推荐文章于 2024-07-25 17:09:29 发布

txyhl

最新推荐文章于 2024-07-25 17:09:29 发布

阅读量633

点赞数

分类专栏： J2EE 所有文章文章标签： lucene exception import constants file date

本文链接：https://blog.csdn.net/txyhl/article/details/1572564

版权

所有文章同时被 2 个专栏收录

12 篇文章 0 订阅

订阅专栏

J2EE

8 篇文章 0 订阅

订阅专栏

由于项目中要用到全文检索，而lucene相对是一个即简单又实用的搜索引擎，就花了一段时候研究lucene，发现用它来做全文检索确实非常方便，现将在学习期间做的一个例子发出来，供大学一起讨论。
首先要先下载lucene的jar包，我用的是2.0的版本，大家可以去www.apache.org去下载。
如有问题，请发邮件：txyhl@126.com

首先在C盘下面建一个文件夹，名字为： lucenetest 然后再在该文件夹下面建两个文件夹，一个命名为test,另一个命名为index,在test文件夹下面建四个文本文件，分别命名为：a.txt ，b.txt，c.txt，d.txt，文件中的内容如下：
a.txt中的内容为：中华人民共和国
b.txt中的内容为：人民共和国
c.txt中的内容为：人民
d.txt中的内容为：共和国

OK,做好了准备工作，让我们开始lucene的神奇之旅吧！

LuceneIndex .java
//该文件主要用来建索引，lucene搜索引擎是基本于索引进行查找的，这样可以提高查找速度
import java.io.*;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

public class LuceneIndex {
    //索引器
    private IndexWriter writer = null;
    public static void main(String[] args) throws Exception
    {
        LuceneIndex indexer = new LuceneIndex();
        Date start = new Date();
        //建立索引
        indexer.writeToIndex();
        Date end = new Date();
        System.out.println("建立索引时共用"+(end.getTime()-start.getTime())+"毫秒");
        indexer.colse();
    }
    public LuceneIndex()
    {
        try
        {
            writer = new IndexWriter(Constants.INDEX_STORE_PATH,new StandardAnalyzer(),true);
        }catch(Exception e)
        {
            e.printStackTrace();
        }
    }

    //将要建立索引的文件构造成一个document对像，并添加一个域“content”
    private Document getDocument(File f) throws Exception
    {
        //生成文档对像
        Document doc = new Document();
        doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
       //下面这段话是从lucene自带的dome中拷过来的，由于英语水平有限，没有半其翻译出来
        // Add the last modified date of the file a field named "modified". Use
        // a field that is indexed (i.e. searchable), but don't tokenize the field
        // into words.
        doc.add(new Field("modified",
            DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
            Field.Store.YES, Field.Index.UN_TOKENIZED));

        // Add the contents of the file to a field named "contents". Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in the system's default encoding.
        // If that's not the case searcing for special characters will fail.
        doc.add(new Field("contents", new FileReader(f)));
        return doc;
    }

    public void writeToIndex() throws Exception
    {
        File file = new File(Constants.INDEX_FILE_PATH);
        if(file.isDirectory())
        {
            String[] files = file.list();
            for(int i=0;i<files.length;i++)
            {
                File f = new File(file,files[i]);
                Document doc = this.getDocument(f);
                System.out.println("正在建立索引："+f);
                writer.addDocument(doc);
            }
        }
    }

    public void colse() throws Exception
    {
        writer.close();
    }
}

Constants .java
//该文件主要用来指明源文件的路径和生成的索引文件所存放的路径，是两个静态变量
public class Constants {
    public final static String INDEX_FILE_PATH="c://lucenetest//test";
    public final static String INDEX_STORE_PATH="c://lucenetest//index";

}

LuceneSearch .java
//该文件是是最重要的一个，它是用来按用户指定的字段来对索引进行搜索
import java.util.Date;

import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

public class LuceneSearch {
    private IndexSearcher searcher = null;
    private Query query = null;
    public static void main(String[] args) throws Exception
    {
        LuceneSearch test = new LuceneSearch();
        Hits hits = null;
        //设置检索关键字（此处设了几个查找关键字，大家可以挨个试试）
        hits = test.search("中华");
        test.printResult(hits);
        //hits = test.search("人民");
        //test.printResult(hits);
        //hits = test.search("共和国");
        //test.printResult(hits);

    }
    public LuceneSearch()
    {
        try
        {
            searcher = new IndexSearcher(IndexReader.open(Constants.INDEX_STORE_PATH));
        }catch(Exception e)
        {
            e.printStackTrace();
        }
    }

    public final Hits search(String keyword)
    {
        try
        {
            System.out.println("正在检索关键字："+keyword);
            QueryParser parser = new QueryParser("contents",new StandardAnalyzer());
            query = parser.parse(keyword);
            Date start = new Date();
            Hits hits =    searcher.search(query);
            Date end = new Date();
            System.out.println("检索完成，用时："+(end.getTime()-start.getTime()));
            return hits;
        }catch(Exception e)
        {
            e.printStackTrace();
            return null;
        }
    }

    public void printResult(Hits hits)
    {
        if(hits.length()==0)
        {
            System.out.println("对不起，没有你要查找的结果！");
        }else
        {
            for(int i=0;i<hits.length();i++)
            {
                try
                {
                    Document doc = hits.doc(i);
                    System.out.print("这是第"+(i+1)+"个检索到的结果，文件名为：");
                    System.out.println(doc.get("path"));
                }catch(Exception e)
                {
                    e.printStackTrace();
                }
                System.out.println("------------------------------------------");
            }
        }
        System.out.println("#################################################");
    }

}

好了，一个简单的lucene实例产生了，要想将其用于商业应用，还要研究很多东西，比如“中文分词”等，希望该文章对大家有所帮助。

txyhl

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
lucene学习笔记

由于项目中要用到全文检索，而lucene相对是一个即简单又实用的搜索引擎，就花了一段时候研究lucene，发现用它来做全文检索确实非常方便，现将在学习期间做的一个例子发出来，供大学一起讨论。首先要先下载lucene的jar包，我用的是2.0的版本，大家可以去www.apache.org去下载。如有问题，请发邮件：txyhl@126.com首先在C盘下面建一个文件夹，名字为：lucenetest 然
复制链接

扫一扫

专栏目录