Lucene5学习之创建索引入门示例

最新推荐文章于 2018-11-28 18:24:31 发布

sc736031305

最新推荐文章于 2018-11-28 18:24:31 发布

阅读量159

点赞数

分类专栏： Lucene 文章标签： Lucene Maven

本文链接：https://blog.csdn.net/sc736031305/article/details/84706652

版权

Lucene 专栏收录该内容

38 篇文章 5 订阅

订阅专栏

Lucene更新实在太快了，只好紧跟脚步开始学习Lucene5,花了点时间写了一个demo，就是程序根据用户提供的一个文件夹，读取该文件夹下的所有文件，然后读取文件里的内容写入索引。读取文件部分采用的是最新的NIO2.0API,因此，JDK必须使用1.7及以上版本。Lucene5开发压缩包请在Lucene官网下载。不多说了，对于码农来说，最直接的就是上代码。

package com.yida.framework.lucene5.core;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * 读取硬盘文件，创建索引
 * 
 * @author Lanxiaowei
 * 
 */
@SuppressWarnings({ "unchecked", "unused", "rawtypes" })
public class IndexFile {
	public static void main(String[] args) throws IOException {
		String dirPath = "D:/docPath";
		String indexPath = "D:/lucenedir";
		createIndex(dirPath, indexPath);
	}
	
	/**
	 * 创建索引
	 * @param dirPath       需要读取的文件所在文件目录
	 * @param indexPath     索引存放目录
	 * @throws IOException
	 */
	public static void createIndex(String dirPath, String indexPath) throws IOException {
		createIndex(dirPath, indexPath, false);
	}
	
	/**
	 * 创建索引
	 * @param dirPath         需要读取的文件所在文件目录
	 * @param indexPath       索引存放目录
	 * @param createOrAppend  始终重建索引/不存在则追加索引
	 * @throws IOException
	 */
	public static void createIndex(String dirPath, String indexPath,
			boolean createOrAppend) throws IOException {
		long start = System.currentTimeMillis();
		Directory dir = FSDirectory.open(Paths.get(indexPath, new String[0]));
		Path docDirPath = Paths.get(dirPath, new String[0]);
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);

		if (createOrAppend) {
			indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
		} else {
			indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
		}
		IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
		indexDocs(writer, docDirPath);
		writer.close();
		long end = System.currentTimeMillis();
		System.out.println("Time consumed:" + (end - start) + " ms");
	}

	/**
	 * 
	 * @param writer
	 *            索引写入器
	 * @param path
	 *            文件路径
	 * @throws IOException
	 */
	public static void indexDocs(final IndexWriter writer, Path path)
			throws IOException {
		// 如果是目录，查找目录下的文件
		if (Files.isDirectory(path, new LinkOption[0])) {
			System.out.println("directory");
			Files.walkFileTree(path, new SimpleFileVisitor() {
				@Override
				public FileVisitResult visitFile(Object file,
						BasicFileAttributes attrs) throws IOException {
					Path path = (Path)file;
					System.out.println(path.getFileName());
					indexDoc(writer, path, attrs.lastModifiedTime().toMillis());
					return FileVisitResult.CONTINUE;
				}
			});
		} else {
			indexDoc(writer, path,
					Files.getLastModifiedTime(path, new LinkOption[0])
							.toMillis());
		}
	}

	/**
	 * 读取文件创建索引
	 * 
	 * @param writer
	 *            索引写入器
	 * @param file
	 *            文件路径
	 * @param lastModified
	 *            文件最后一次修改时间
	 * @throws IOException
	 */
	public static void indexDoc(IndexWriter writer, Path file, long lastModified)
			throws IOException {
		InputStream stream = Files.newInputStream(file, new OpenOption[0]);
		Document doc = new Document();

		Field pathField = new StringField("path", file.toString(),
				Field.Store.YES);
		doc.add(pathField);

		doc.add(new LongField("modified", lastModified, Field.Store.NO));
		doc.add(new TextField("contents", new BufferedReader(
				new InputStreamReader(stream, StandardCharsets.UTF_8))));

		if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
			System.out.println("adding " + file);
			writer.addDocument(doc);
		} else {
			System.out.println("updating " + file);
			writer.updateDocument(new Term("path", file.toString()), doc);
		}
		writer.commit();
	}
}

项目采用的是Maven构建，怎么创建Maven Project就不用介绍了吧，我就贴下pom配置吧。

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.yida.framework</groupId>
	<artifactId>lucene5</artifactId>
	<packaging>war</packaging>
	<version>1.0</version>
	<name>lucene5 Maven Webapp</name>
	<url>http://maven.apache.org</url>
	
	<properties>
	    <lucene.version>5.0.0</lucene.version>
	</properties>
	
	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>3.8.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
			<version>${lucene.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-analyzers-common</artifactId>
			<version>${lucene.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-queryparser</artifactId>
			<version>${lucene.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-highlighter</artifactId>
			<version>${lucene.version}</version>
		</dependency>
	</dependencies>
	<build>
		<finalName>lucene5</finalName>
	</build>
</project>

项目结构图如图：

运行之前，先在D盘新建两个文件夹，如图：