Lucene6入门教程(二)索引的创建

(一)索引的创建步骤:
学习Lucene,最重要的一点在于索引的建立,这是一切搜索等的基础,Lucene6创建索引的步骤如下:
(1)创建目录(Directory),(即多线程支持创建);
(2)词库分析器(Analyzer)的创建(要注意使用的是哪种Analyzer,创建的时候也要使用对应的索引器);
(3)IndexWriterConfig对象创建,获取IndexWriter对象,判断覆盖/追加索引;
(3)遍历索引的对象列表,创建文件对象(Document),添加块(Field)等;
(4)通过IndexWriter将文档添加到索引中;
(5)结束索引创建过程,IndexWriter执行close()结束。

(二)代码示例:

pom.xml的配置:我用的是lucene6.4.1,用其他的也可以
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>IDC</groupId>
    <artifactId>luc</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

    <properties>
        <lucene.version>6.4.1</lucene.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>com.google.zxing</groupId>
            <artifactId>core</artifactId>
            <version>3.2.0</version>
        </dependency>
        <dependency>
            <groupId>com.chenlb.mmseg4j</groupId>
            <artifactId>mmseg4j-analysis</artifactId>
            <version>1.9.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-memory</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queries</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-demo</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-api</artifactId>
            <version>RELEASE</version>
        </dependency>

    </dependencies>
    <!-- lucene end -->
</project>
**java(IDEA)代码:**
package com.Licene6;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;

/***Created by Mo
 *On 2017/8/18  ***13:39.
 ******/
public class Index {
    private IndexWriter writer;//写入索引的类
    //FileFilter的实现类,用来过滤符合条件的文档。
    private static class TextFilesFilter implements FileFilter {
        @Override//重构
        public boolean accept(File pathname) {
            return pathname.getName().toLowerCase().endsWith(".txt");
        }
    }
    //构造方法,用来传入索引存放路径
    public Index(String indexdirectory) throws IOException {
        Directory directory = FSDirectory.open(Paths.get(indexdirectory));//打开目录
        //索引
        IndexWriterConfig config=new IndexWriterConfig(new StandardAnalyzer());
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        writer=new IndexWriter(directory,config);
    }
    //关闭indexWriter,不要忘记了
    public void close() throws IOException{
        writer.close();
    }
    //遍历文件夹下所有文件,选择符合条件文件,写入索引的方法
    public int index(String dataDir,FileFilter filter) throws IOException{
        File[] files=new File(dataDir).listFiles();
        for(File file:files){
            if(!file.isDirectory() && !file.isHidden()
                    && file.exists()
                    && file.canRead()
                    && (filter==null) || filter.accept(file)){
                indexFile(file);
            }
        }
        return writer.numDocs();//返回写入的文档总数
    }
    //写入索引的方法,将生成的Document(目录)对象写入到索引中
    private void indexFile(File file) throws IOException{
        System.out.println("indexing..."+file.getCanonicalPath());
        Document doc=getDocument(file);
        writer.addDocument(doc);
    }
    //生成Document对象的方法,Document对象就是对文档各个属性的封装
    protected Document getDocument(File file) throws IOException{
        Document doc=new Document();
        doc.add(new Field("contents",new FileReader(file), TextField.TYPE_NOT_STORED));//分析但不存储
        doc.add(new Field("filename",file.getName(),TextField.TYPE_STORED));//存储并分词
        doc.add(new Field("fullpath",file.getCanonicalPath(),TextField.TYPE_STORED));//存储并分词
        return doc;
    }

    public static void main(String[] args) throws IOException {
        String indexDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\index";//目录,里边可以没有内容
        String dataDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\data";//文件,里边要有.txt文件

        long start=System.currentTimeMillis();//当前时间
        Index index =new Index(indexDir);
        int numberIndexed= index.index(dataDir, new TextFilesFilter());//写入索引
        index.close();//关闭,这个是需要的
        long end=System.currentTimeMillis();
        System.out.println(numberIndexed);
        System.out.println(end-start);//索引时间
    }
}

参考:
1. 一步:http://blog.csdn.net/wuyinggui10000/article/details/45502445
2. Lucene教程:http://blog.csdn.net/zpf336/article/details/45097975

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值