Lucene4全文索引示例

Lucene4.2.1示例,之前也做过3.6的示例。3.6的分词需要使用IKAnalyzer或者其他的分词,对中文的支持可能才会更好,但是4.2为我们提供了SmartChineseAnalyzer这个中文分词器。

 

下面是一个简单的示例程序,分别对应增删改查:

 

package com.xiva.test.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class IvFileIndex
{

    private static List<File> fileList = new ArrayList<File>(1024);

    public static void listAllFile(File fileDir)
    {
        File[] files = fileDir.listFiles();
        for (File file : files)
        {
            if (file.isDirectory())
            {
                listAllFile(file);
            }
            else
            {
                fileList.add(file);
            }
        }
    }

    public static void main(String[] args) throws Exception
    {
        File fileDir = new File("F:\\WorkSpace");
        File indexDir = new File("F:\\WorkSpace\\EclipseProjects\\luceneIndex");

        Analyzer luceneAnalyzer = new SmartChineseAnalyzer(Version.LUCENE_42);

        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, luceneAnalyzer);
        config.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE);

        Directory fsDir = new SimpleFSDirectory(indexDir);
        IndexWriter indexWriter = new IndexWriter(fsDir, config);

        listAllFile(fileDir);
        long startTime = new Date().getTime();

        indexWriter.deleteAll();

        // 增加document到索引去
        for (File txtFile : fileList)
        {
            if (txtFile.isFile() && txtFile.getName().endsWith(".java"))
            {
                System.out.println(txtFile.getName());
                FileInputStream fis = null;
                try
                {
                    fis = new FileInputStream(txtFile);
                }
                catch (FileNotFoundException fnfe)
                {
                    continue;
                }

                try
                {
                    Document document = new Document();
                    Field fieldPath = new StringField("path", txtFile.getPath(), Field.Store.YES);
                    Field fieldBody = new TextField("body", new BufferedReader(new InputStreamReader(fis, "GBK")));

                    document.add(fieldPath);
                    document.add(fieldBody);
                    indexWriter.addDocument(document);
                }
                finally
                {
                    fis.close();
                }

                System.out.println("被索引文件:" + txtFile.getCanonicalPath());
            }
        }

        // 对索引进行优化
        indexWriter.forceMerge(10);

        indexWriter.close();

        // 测试一下索引的时间
        long endTime = new Date().getTime();
        System.out.println("索引耗费时间:" + (endTime - startTime) + " 毫秒!");
    }

}

 

package com.xiva.test.lucene;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/**
 * 
 * 删除索引
 * @author xiva
 * @version [版本号, 2013-4-30]
 * @see [相关类/方法]
 * @since [产品、模块版本]
 */
public class IvIndexDelete
{
    public static void main(String[] args) throws Exception
    {
        File fileDir = new File("E:\\data\\lucene");
        File indexDir = new File("E:\\data\\index");
        
        Analyzer luceneAnalyzer = new SmartChineseAnalyzer(Version.LUCENE_42);
        
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42,
                luceneAnalyzer);
        config.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.APPEND);
        
        Directory fsDir = new SimpleFSDirectory(indexDir);
        IndexWriter indexWriter = new IndexWriter(fsDir, config);
        File[] txtFiles = fileDir.listFiles();
        long startTime = new Date().getTime();
        
        // 增加document到索引去  
        for (int i = 0; i < txtFiles.length; i++)
        {
            if (txtFiles[i].isFile() && txtFiles[i].getName().endsWith("u.txt"))
            {
                FileInputStream fis = null;
                try
                {
                    fis = new FileInputStream(txtFiles[i]);
                }
                catch (FileNotFoundException fnfe)
                {
                    continue;
                }
                
                try
                {
                    
                    indexWriter.deleteDocuments(new Term("path",
                            txtFiles[i].getPath()));
                }
                finally
                {
                    fis.close();
                }
                
                System.out.println("被删除索引文件:" + txtFiles[i].getCanonicalPath());
            }
        }
        
        indexWriter.forceMerge(10);
        indexWriter.close();
        
        //测试一下索引的时间  
        long endTime = new Date().getTime();
        System.out.println("删除索引耗费时间:" + (endTime - startTime) + " 毫秒!");
    }
}

 

package com.xiva.test.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class IvIndexUpdate
{
    public static void updateIndex() throws Exception
    {
        File fileDir = new File("E:\\data\\lucene");
        File indexDir = new File("E:\\data\\index");

        Analyzer luceneAnalyzer = new SmartChineseAnalyzer(Version.LUCENE_42);

        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, luceneAnalyzer);

        config.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.APPEND);

        Directory fsDir = new SimpleFSDirectory(indexDir);
        IndexWriter indexWriter = new IndexWriter(fsDir, config);
        File[] txtFiles = fileDir.listFiles();
        long startTime = new Date().getTime();

        // 增加document到索引去
        for (int i = 0; i < txtFiles.length; i++)
        {
            if (txtFiles[i].isFile() && txtFiles[i].getName().endsWith("u.txt"))
            {
                FileInputStream fis;
                try
                {
                    fis = new FileInputStream(txtFiles[i]);
                }
                catch (FileNotFoundException fnfe)
                {
                    continue;
                }

                try
                {
                    Document document = new Document();
                    Field fieldPath = new StringField("path", txtFiles[i].getPath(), Field.Store.YES);
                    Field fieldBody = new TextField("body", new BufferedReader(new InputStreamReader(fis, "GBK")));
                    
                    document.add(fieldPath);
                    document.add(fieldBody);

                    indexWriter.updateDocument(new Term("path", txtFiles[i].getPath()), document);
                }
                finally
                {
                    fis.close();
                }

                System.out.println("被更新索引文件:" + txtFiles[i].getCanonicalPath());
            }
        }

        indexWriter.forceMerge(10);
        indexWriter.close();

        // 测试一下索引的时间
        long endTime = new Date().getTime();
        System.out.println("更新索引耗费时间:" + (endTime - startTime) + " 毫秒!");
    }

    public static void main(String[] args) throws Exception
    {
        updateIndex();
    }
}

 

package com.xiva.test.lucene;

import java.io.File;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IvFileSearch
{
    public static void main(String[] args) throws IOException
    {
        String queryString = "索引";
        String field = "body";
        Query query = null;
        TopDocs docs = null;

        File indexDir = new File("F:\\WorkSpace\\EclipseProjects\\luceneIndex");
        IndexReader reader = DirectoryReader.open(FSDirectory.open(indexDir));
        IndexSearcher searcher = new IndexSearcher(reader);

        // StopFilterFactory factory = new StopFilterFactory();
        // factory.getStopWords()
        Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_42);

        try
        {
            long startTime = new Date().getTime();
            QueryParser qp = new QueryParser(Version.LUCENE_42, field, analyzer);
            query = qp.parse(queryString);

            long endTime = new Date().getTime();
            System.out.println("索引耗费时间:" + (endTime - startTime) + " 毫秒!");
        }
        catch (ParseException e)
        {
            e.printStackTrace();
        }

        if (searcher != null)
        {
            docs = searcher.search(query, 25);// 可以分页查询

            ScoreDoc scoreDocs[] = docs.scoreDocs;

            for (int i = 0; i < docs.totalHits; i++)
            {
                Document targetDoc = searcher.doc(scoreDocs[i].doc);
                String path = targetDoc.get("path");
                System.out.println("path:" + path);
            }
        }
    }
}

 

PS:对于数据库操作时,相信大家都有相关的方法去更新或者删除索引,比如及时更新或者使用定时扫描表的方法。数据库本身也具有全文索引的特性,比如Oracle和MSSQL。

 

对与文件的操作,我的解决方法是:可以采用 利用JNA对文件进行监听之观察者模式 这里给出的方法来更新或者删除索引。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值