package com.lin.util;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Indexer {
private IndexWriter writer;
/**
* 主程序
*
* @param indexDir
* 索引位置
* @param dataDir
* 数据来源
* @throws Exception
*/
public static void index(String indexDir, String dataDir) throws Exception {
if (indexDir == null || dataDir == null) {
throw new IllegalArgumentException("请检查你的参数是否正确");
}
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
/**
* 初始化writer(用与建立索引)
*
* @param indexDir
* @throws IOException
*/
private Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_2,
new IKAnalyzer());
writer = new IndexWriter(dir, config);
}
/**
*
* @param dataDir
* @param filter
* @return 索引的文件数
* @throws IOException
*/
public int index(String dataDir, FileFilter filter) throws IOException {
File[] files = new File(dataDir).listFiles();
for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.canRead() && f.exists()
&& (filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs();
}
private void indexFile(File f) throws IOException {
System.out.println("indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
@SuppressWarnings("deprecation")
protected Document getDocument(File f) throws IOException {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f)));
doc.add(new Field("filename", f.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
return doc;
}
/**
* 文件过滤器
*
* @author zan
*
*/
private static class TextFilesFilter implements FileFilter {
public boolean accept(File f) {
return f.getName().toLowerCase().endsWith(".txt");
}
}
public void close() throws IOException {
if (writer != null) {
writer.close();
}
}
public static void main(String[] args) throws Exception {
Indexer.index("d:\\index", "D:\\Program Files\\TortoiseSVN");
}
}
Lucene实战-Indexer索引创建
最新推荐文章于 2022-01-24 13:49:04 发布