IndexWriter基础使用及注意事项

这是关于3.0版本的原文

http://www.cnblogs.com/huangfox/archive/2010/10/15/1852371.html


照上述文章的例子,试了下Lucene 4.6版本


//注意点2:filed实例在多次添加的时候可以重用,节约构造field实例的时间。

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;


public class ceshi0303 {
	public static void main(String[] args) {
		IndexWriter writer = null;
		FSDirectory dir = null;
		try {
			dir = SimpleFSDirectory.open(new File("d:\\20140303index"));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);  
            IndexWriterConfig iwConfig = new IndexWriterConfig(  
                    Version.LUCENE_46, analyzer);  
			writer = new IndexWriter(dir, iwConfig);
			//Field f1 = new StringField("f1", "", Store.YES);
			//Field f2 = new StringField("f2", "", Store.YES);
			long s = System.currentTimeMillis();
			for (int i = 0; i < 500000; i++) {
				Document doc = new Document();
				doc.add(new StringField("f1", "\"f1 hello doc\" + i", Store.YES));
				doc.add(new StringField("f2", "\"f2 hello doc\" + i", Store.YES));
				/*
				f1.setStringValue("f1 hello doc" + i);
				doc.add(f1);
				f2.setStringValue("f2 world doc" + i);
				doc.add(f2);
				*/
				writer.addDocument(doc);
			}
			System.out.println(System.currentTimeMillis() - s + "ms");
			System.out.println("over");
		}catch (IOException e) {
			e.printStackTrace();
		}finally {
			try {
				writer.close();
			}catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;


public class ceshi0303 {
	public static void main(String[] args) {
		IndexWriter writer = null;
		FSDirectory dir = null;
		try {
			dir = SimpleFSDirectory.open(new File("d:\\20140303index"));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);  
            IndexWriterConfig iwConfig = new IndexWriterConfig(  
                    Version.LUCENE_46, analyzer);  
			writer = new IndexWriter(dir, iwConfig);
			Field f1 = new StringField("f1", "", Store.YES);
			Field f2 = new StringField("f2", "", Store.YES);
			long s = System.currentTimeMillis();
			for (int i = 0; i < 500000; i++) {
				Document doc = new Document();
				//doc.add(new StringField("f1", "\"f1 hello doc\" + i", Store.YES));
				//doc.add(new StringField("f2", "\"f2 hello doc\" + i", Store.YES));
				
				f1.setStringValue("f1 hello doc" + i);
				doc.add(f1);
				f2.setStringValue("f2 world doc" + i);
				doc.add(f2);
				
				writer.addDocument(doc);
			}
			System.out.println(System.currentTimeMillis() - s + "ms");
			System.out.println("over");
		}catch (IOException e) {
			e.printStackTrace();
		}finally {
			try {
				writer.close();
			}catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}

上面2分代码我分别跑了5次,取平局值,第一份的时间是4008.6ms,第二次的时间是3892.8ms。原因很简单,节省了大量的GC cost。


而对于注意点1,

//注意点1:在window系统中我们通常使用simpleFSDirectory,而其他操作系统则使用NIOFSDirectory。

我测试了如下代码(就是把SimpleFSDirectory改成了FSDirectory):

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;


public class ceshi0303 {
	public static void main(String[] args) {
		IndexWriter writer = null;
		FSDirectory dir = null;
		try {
			dir = FSDirectory.open(new File("d:\\20140303index"));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);  
            IndexWriterConfig iwConfig = new IndexWriterConfig(  
                    Version.LUCENE_46, analyzer);  
			writer = new IndexWriter(dir, iwConfig);
			Field f1 = new StringField("f1", "", Store.YES);
			Field f2 = new StringField("f2", "", Store.YES);
			long s = System.currentTimeMillis();
			for (int i = 0; i < 500000; i++) {
				Document doc = new Document();
				//doc.add(new StringField("f1", "\"f1 hello doc\" + i", Store.YES));
				//doc.add(new StringField("f2", "\"f2 hello doc\" + i", Store.YES));
				
				f1.setStringValue("f1 hello doc" + i);
				doc.add(f1);
				f2.setStringValue("f2 world doc" + i);
				doc.add(f2);
				
				writer.addDocument(doc);
			}
			System.out.println(System.currentTimeMillis() - s + "ms");
			System.out.println("over");
		}catch (IOException e) {
			e.printStackTrace();
		}finally {
			try {
				writer.close();
			}catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}
运行平均时间变成了3903ms,而FSDirectory个人认为会自动根据平台来选择具体使用 SimpleFSDirectory还是NIOFSDirectory,比较方便。


对于注意点3,目前没有遇到过,感触不深。


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值