(一)索引的创建步骤:
学习Lucene,最重要的一点在于索引的建立,这是一切搜索等的基础,Lucene6创建索引的步骤如下:
(1)创建目录(Directory),(即多线程支持创建);
(2)词库分析器(Analyzer)的创建(要注意使用的是哪种Analyzer,创建的时候也要使用对应的索引器);
(3)IndexWriterConfig对象创建,获取IndexWriter对象,判断覆盖/追加索引;
(3)遍历索引的对象列表,创建文件对象(Document),添加块(Field)等;
(4)通过IndexWriter将文档添加到索引中;
(5)结束索引创建过程,IndexWriter执行close()结束。
(二)代码示例:
pom.xml的配置:我用的是lucene6.4.1,用其他的也可以
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>IDC</groupId>
<artifactId>luc</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<lucene.version>6.4.1</lucene.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>com.google.zxing</groupId>
<artifactId>core</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>com.chenlb.mmseg4j</groupId>
<artifactId>mmseg4j-analysis</artifactId>
<version>1.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-demo</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>RELEASE</version>
</dependency>
</dependencies>
<!-- lucene end -->
</project>
**java(IDEA)代码:**
package com.Licene6;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
/***Created by Mo
*On 2017/8/18 ***13:39.
******/
public class Index {
private IndexWriter writer;//写入索引的类
//FileFilter的实现类,用来过滤符合条件的文档。
private static class TextFilesFilter implements FileFilter {
@Override//重构
public boolean accept(File pathname) {
return pathname.getName().toLowerCase().endsWith(".txt");
}
}
//构造方法,用来传入索引存放路径
public Index(String indexdirectory) throws IOException {
Directory directory = FSDirectory.open(Paths.get(indexdirectory));//打开目录
//索引
IndexWriterConfig config=new IndexWriterConfig(new StandardAnalyzer());
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
writer=new IndexWriter(directory,config);
}
//关闭indexWriter,不要忘记了
public void close() throws IOException{
writer.close();
}
//遍历文件夹下所有文件,选择符合条件文件,写入索引的方法
public int index(String dataDir,FileFilter filter) throws IOException{
File[] files=new File(dataDir).listFiles();
for(File file:files){
if(!file.isDirectory() && !file.isHidden()
&& file.exists()
&& file.canRead()
&& (filter==null) || filter.accept(file)){
indexFile(file);
}
}
return writer.numDocs();//返回写入的文档总数
}
//写入索引的方法,将生成的Document(目录)对象写入到索引中
private void indexFile(File file) throws IOException{
System.out.println("indexing..."+file.getCanonicalPath());
Document doc=getDocument(file);
writer.addDocument(doc);
}
//生成Document对象的方法,Document对象就是对文档各个属性的封装
protected Document getDocument(File file) throws IOException{
Document doc=new Document();
doc.add(new Field("contents",new FileReader(file), TextField.TYPE_NOT_STORED));//分析但不存储
doc.add(new Field("filename",file.getName(),TextField.TYPE_STORED));//存储并分词
doc.add(new Field("fullpath",file.getCanonicalPath(),TextField.TYPE_STORED));//存储并分词
return doc;
}
public static void main(String[] args) throws IOException {
String indexDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\index";//目录,里边可以没有内容
String dataDir="D:\\workspace\\lucene6.4.1\\learing2017.8\\0818\\data";//文件,里边要有.txt文件
long start=System.currentTimeMillis();//当前时间
Index index =new Index(indexDir);
int numberIndexed= index.index(dataDir, new TextFilesFilter());//写入索引
index.close();//关闭,这个是需要的
long end=System.currentTimeMillis();
System.out.println(numberIndexed);
System.out.println(end-start);//索引时间
}
}
参考:
1. 一步:http://blog.csdn.net/wuyinggui10000/article/details/45502445
2. Lucene教程:http://blog.csdn.net/zpf336/article/details/45097975