package com.cheea.lucene;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class FileIndexer {
private File baseDir = new File("F:\\dateDir");
private File indexDir = new File("F:\\indexDir");
public FileIndexer() {
if (!this.baseDir.exists() || !this.indexDir.exists()) {
return;
}
}
@SuppressWarnings("deprecation")
public void createIndex() {
try {
IndexWriter writer = new IndexWriter(
FSDirectory.open(indexDir),
new IKAnalyzer(false),
true,
IndexWriter.MaxFieldLength.LIMITED);
indexDirectory(writer, baseDir);
writer.optimize(); //优化合并
writer.close();
System.out.println("索引完毕");
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private void indexDirectory(IndexWriter writer, File dir) {
if (!dir.exists() || !dir.isDirectory()) {
return;
}
File[] files = dir.listFiles();
for (File file : files) {
if (file.isDirectory()) indexDirectory(writer, file);
else indexFile(writer, file);
}
}
private void indexFile(IndexWriter writer, File file) {
if (file.isHidden() || !file.exists() || !file.canRead()) {
return;
}
try {
if (file.getCanonicalPath().endsWith(".txt")) {
System.out.println("正在索引:" + file.getCanonicalPath());
Document doc = new Document();
doc.add(new Field("text",
new InputStreamReader(new FileInputStream(file),"GBK")));// 对文件内容索引
doc.add(new Field("filename",
file.getCanonicalPath(),
Field.Store.YES, Field.Index.ANALYZED));// 对文件名建立索引
writer.addDocument(doc);// 调用addDocument()方法,Lucene会建立doc的索引
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
FileIndexer lucene = new FileIndexer();
lucene.createIndex();
}
}
Lucene3.4结合IKAnalyzer3.2 建立索引
最新推荐文章于 2019-12-06 16:15:30 发布