Lucene实战-Indexer索引创建

package com.lin.util;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Indexer {

	private IndexWriter writer;

	/**
	 * 主程序
	 * 
	 * @param indexDir
	 *            索引位置
	 * @param dataDir
	 *            数据来源
	 * @throws Exception
	 */
	public static void index(String indexDir, String dataDir) throws Exception {
		if (indexDir == null || dataDir == null) {
			throw new IllegalArgumentException("请检查你的参数是否正确");
		}
		long start = System.currentTimeMillis();
		Indexer indexer = new Indexer(indexDir);
		int numIndexed;
		try {
			numIndexed = indexer.index(dataDir, new TextFilesFilter());
		} finally {
			indexer.close();
		}
		long end = System.currentTimeMillis();
		System.out.println("Indexing " + numIndexed + " files took "
				+ (end - start) + " milliseconds");
	}

	/**
	 * 初始化writer(用与建立索引)
	 * 
	 * @param indexDir
	 * @throws IOException
	 */
	private Indexer(String indexDir) throws IOException {
		Directory dir = FSDirectory.open(new File(indexDir));
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_2,
				new IKAnalyzer());
		writer = new IndexWriter(dir, config);
	}

	/**
	 * 
	 * @param dataDir
	 * @param filter
	 * @return 索引的文件数
 	 * @throws IOException
	 */
	public int index(String dataDir, FileFilter filter) throws IOException {
		File[] files = new File(dataDir).listFiles();
		for (File f : files) {
			if (!f.isDirectory() && !f.isHidden() && f.canRead() && f.exists()
					&& (filter == null || filter.accept(f))) {
				indexFile(f);
			}
		}
		return writer.numDocs();
	}

	private void indexFile(File f) throws IOException {
		System.out.println("indexing " + f.getCanonicalPath());
		Document doc = getDocument(f);
		writer.addDocument(doc);

	}

	@SuppressWarnings("deprecation")
	protected Document getDocument(File f) throws IOException {
		Document doc = new Document();
		doc.add(new Field("contents", new FileReader(f)));
		doc.add(new Field("filename", f.getName(), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		return doc;
	}

	/**
	 * 文件过滤器
	 * 
	 * @author zan
	 * 
	 */
	private static class TextFilesFilter implements FileFilter {

		public boolean accept(File f) {
			return f.getName().toLowerCase().endsWith(".txt");
		}

	}

	public void close() throws IOException {
		if (writer != null) {
			writer.close();
		}

	}

	public static void main(String[] args) throws Exception {
		Indexer.index("d:\\index", "D:\\Program Files\\TortoiseSVN");
	}
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值