基于lucene7.5创建索引的demo

最新推荐文章于 2021-04-09 11:09:13 发布

浮华2017

最新推荐文章于 2021-04-09 11:09:13 发布

阅读量210

点赞数

分类专栏： Lucene

本文链接：https://blog.csdn.net/LoveMin2017/article/details/91040525

版权

Lucene 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

索引目录下的所有文本文件。
此案例是来自 http://lucene.apache.org/core/7_5_0/demo/index.html

package com.sunfeng.lucene.meetlucene;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


/**
 * 索引目录下的所有文本文件。
 * 这是一个命令行应用程序，演示了简单的Lucene索引。
 * 在没有命令行参数的情况下运行它以获取用法信息。
 */
public class IndexFiles {

    private IndexFiles() {
    }

    public static void main(String[] args) {
        final String indexPath = "d://lucene//demo01";
        boolean create = true;
        final Path docDir = Paths.get("D:\\upload\\standard");
        if (!Files.isReadable(docDir)) {
            System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path");
            System.exit(1);
        }
        Date start = new Date();
        try {
            //  打印要索引的目录
            System.out.println("Indexing to directory '" + indexPath + "'...");
            // 读取需要索引的文件到Lucene的目录类中，新版的Lucene只支持IO2中的Path类型的变量了。
            Directory dir = FSDirectory.open(Paths.get(indexPath));
            // 创建分词器，这里使用的是SmartChineseAnalyzer分词器，可以根据具体需要使用其他分词器。
            Analyzer analyzer = new SmartChineseAnalyzer();
            // 新版的Lucene中索引创建类只接收IndexWriterConfig配置。
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            // 是否是新建
            if (create) {
                //  创建新索引或覆盖现有索引。
                iwc.setOpenMode(OpenMode.CREATE);
            } else {
                // 如果不存在创建一个新的索引
                // 否则，它将打开索引并追加文档。
                iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            }
            //获取IndexWriter对象
            IndexWriter writer = new IndexWriter(dir, iwc);
            indexDocs(writer, docDir);
            writer.close();
            Date end = new Date();
            System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        } catch (IOException e) {
            System.out.println(" caught a " + e.getClass() +
                    "\n with message: " + e.getMessage());
        }
    }
    static void indexDocs(final IndexWriter writer, Path path) throws IOException {
        // 如果是目录 进行下面的处理
        if (Files.isDirectory(path)) {
            Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
                @Override
                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
                    try {
                        indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
                    } catch (IOException ignore) {
                    }
                    return FileVisitResult.CONTINUE;
                }
            });
        } else {
//            如果是文件
            indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
        }
    }
    /**
     * 索引一个文档
     */
    static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
        try (InputStream stream = Files.newInputStream(file)) {
            // 创建一个空的文档
            Document doc = new Document();
            //将文件路径添加为名为“path”的字段。使用
            //索引（即可搜索）但不标记化的字段
            //字段分成单独的单词，不索引词频
            //或位置信息：
            Field pathField = new StringField("path", file.toString(), Field.Store.YES);
            doc.add(pathField);
            //将文件的上次修改日期添加到名为“modified”的字段中。
            //使用索引的长点（即高效筛选
            doc.add(new LongPoint("modified", lastModified));
            doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
            // 创建或者更新
            if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                System.out.println("adding " + file);
                writer.addDocument(doc);
            } else {
                System.out.println("updating " + file);
                //  更新文档    根据文档的名称
                writer.updateDocument(new Term("path", file.toString()), doc);
            }
        }
    }
}

浮华2017

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
基于lucene7.5创建索引的demo

索引目录下的所有文本文件。此案例是来自 http://lucene.apache.org/core/7_5_0/demo/index.htmlpackage com.sunfeng.lucene.meetlucene;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;...
复制链接

扫一扫

专栏目录