Lucene6入门教程（二）索引的创建

最新推荐文章于 2022-05-28 16:27:58 发布

Macropodus

最新推荐文章于 2022-05-28 16:27:58 发布

阅读量1.4k

点赞数

分类专栏： lucene6 文章标签： lucene 索引教程 6 创建

本文链接：https://blog.csdn.net/rensihui/article/details/77450491

版权

lucene6 专栏收录该内容

6 篇文章 1 订阅

订阅专栏

（一）索引的创建步骤：
学习Lucene,最重要的一点在于索引的建立，这是一切搜索等的基础，Lucene6创建索引的步骤如下：
（1）创建目录（Directory），（即多线程支持创建）；
（2）词库分析器（Analyzer）的创建（要注意使用的是哪种Analyzer，创建的时候也要使用对应的索引器）；
（3）IndexWriterConfig对象创建,获取IndexWriter对象，判断覆盖/追加索引；
（3）遍历索引的对象列表,创建文件对象（Document）,添加块（Field）等;
（4）通过IndexWriter将文档添加到索引中；
（5）结束索引创建过程，IndexWriter执行close()结束。

（二）代码示例：

pom.xml的配置：我用的是lucene6.4.1,用其他的也可以
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>IDC</groupId>
    <artifactId>luc</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

    <properties>
        <lucene.version>6.4.1</lucene.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>com.google.zxing</groupId>
            <artifactId>core</artifactId>
            <version>3.2.0</version>
        </dependency>
        <dependency>
            <groupId>com.chenlb.mmseg4j</groupId>
            <artifactId>mmseg4j-analysis</artifactId>
            <version>1.9.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-memory</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queries</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-demo</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-api</artifactId>
            <version>RELEASE</version>
        </dependency>

    </dependencies>
    <!-- lucene end -->
</project>

**java(IDEA)代码：**
package com.Licene6;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;

/***Created by Mo
 *On 2017/8/18  ***13:39.
 ******/
public class Index {
    private IndexWriter writer;//写入索引的类
    //FileFilter的实现类，用来过滤符合条件的文档。
    private static class TextFilesFilter implements FileFilter {
        @Override//重构
        public boolean accept(File pathname) {
            return pathname.getName().toLowerCase().endsWith(".txt");
        }
    }
    //构造方法，用来传入索引存放路径
    public Index(String indexdirectory) throws IOException {
        Directory directory = FSDirectory.open(Paths.get(indexdirectory));//打开目录
        //索引
        IndexWriterConfig config=new IndexWriterConfig(new StandardAnalyzer());
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        writer=new IndexWriter(directory,config);
    }
    //关闭indexWriter,不要忘记了
    public void close() throws IOException{
        writer.close();
    }
    //遍历文件夹下所有文件，选择符合条件文件，写入索引的方法
    public int index(String dataDir,FileFilter filter) throws IOException{
        File[] files=new File(dataDir).listFiles();
        for(File file:files){
            if(!file.isDirectory() && !file.isHidden()
                    && file.exists()
                    && file.canRead()
                    && (filter==null) || filter.accept(file)){
                indexFile(file);
            }
        }
        return writer.numDocs();//返回写入的文档总数
    }
    //写入索引的方法，将生成的Document（目录）对象写入到索引中
    private void indexFile(File file) throws IOException{
        System.out.println("indexing..."+file.getCanonicalPath());
        Document doc=getDocument(file);
        writer.addDocument(doc);
    }
    //生成Document对象的方法，Document对象就是对文档各个属性的封装
    protected Document getDocument(File file) throws IOException{
        Document doc=new Document();
        doc.add(new Field("contents",new FileReader(file), TextField.TYPE_NOT_STORED));//分析但不存储
        doc.add(new Field("filename",file.getName(),TextField.TYPE_STORED));//存储并分词
        doc.add(new Field("fullpath",file.getCanonicalPath(),TextField.TYPE_STORED));//存储并分词
        return doc;
    }

    public static void main(String[] args) throws IOException {
        String indexDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\index";//目录，里边可以没有内容
        String dataDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\data";//文件，里边要有.txt文件

        long start=System.currentTimeMillis();//当前时间
        Index index =new Index(indexDir);
        int numberIndexed= index.index(dataDir, new TextFilesFilter());//写入索引
        index.close();//关闭，这个是需要的
        long end=System.currentTimeMillis();
        System.out.println(numberIndexed);
        System.out.println(end-start);//索引时间
    }
}