Lucene5 一个简单的Demo
lucene不知不觉就更新到5.1版本了。下面是使用5.1做的一个简单例子。
package com.jasonware.lucene5;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
/**
* Created by justin on 15/5/5.
*/
public class SimpleDemo {
public static void main(String[] args) throws IOException {
String dirPath = "/Users/justin/Documents/java/lucene5/docDir";
String indexPath = "/Users/justin/Documents/java/lucene5/indexDir";
createIndex(dirPath,indexPath,false);
}
/**
* 创建索引
* @param dirPath 需要索引的文件目录
* @param indexPath 索引存放目录
* @param createOrAppend
* @throws IOException
*/
public static void createIndex(String dirPath,String indexPath,boolean createOrAppend) throws IOException {
long startTime = System.currentTimeMillis();
Directory directory = FSDirectory.open(Paths.get(indexPath, new String[0]));
Path docDirPath = Paths.get(dirPath, new String[0]);
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
if(createOrAppend){
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}else{
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
}
IndexWriter indexWriter = new IndexWriter(directory,indexWriterConfig);
indexDocs(indexWriter,docDirPath);
indexWriter.close();
System.out.println(System.currentTimeMillis()-startTime);
}
/**
* 根据文件路径对文件内容进行索引
* 如果是目录则扫描目录下的文件
* @param indexWriter
* @param path
* @throws IOException
*/
public static void indexDocs(final IndexWriter indexWriter,Path path) throws IOException {
if(Files.isDirectory(path,new LinkOption[0])){
//目录
Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
indexDoc(indexWriter, file, attrs.lastModifiedTime().toMillis());
return FileVisitResult.CONTINUE;
}
});
} else {
indexDoc(indexWriter, path, Files.getLastModifiedTime(path,new LinkOption[0]).toMillis());
}
}
/**
* 根据文件路径对文件内容进行索引
* @param indexWriter
* @param path
* @throws IOException
*/
public static void indexDoc(IndexWriter indexWriter,Path path,long lastModified) throws IOException {
Document document = new Document();
Field pathField = new StringField("path",path.toString(),Field.Store.YES);
document.add(pathField);
Field lastModifiedField = new LongField("modified",lastModified,Field.Store.NO);
document.add(lastModifiedField);
Field contentField = new TextField("content",new BufferedReader(
new InputStreamReader(Files.newInputStream(path, new OpenOption[0]), StandardCharsets.UTF_8)
));
document.add(contentField);
if(indexWriter.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE){
indexWriter.addDocument(document);
}else{
indexWriter.updateDocument(new Term("path", path.toString()), document);
}
indexWriter.commit();
}
}
使用的POM配置如下:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>jasonware</groupId>
<artifactId>lucene5</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<lucene.version>5.1.0</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.1.0</version>
</dependency>
</dependencies>
</project>