代码主体:
package com.test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.PackedInts.Writer;
public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@tom.com","bb@edu.com","cc@sina.com","dd@yaho.com","ee@qq.com","ff@163.com"};
private String[] content = {"welcome to 1 room","welcome to 2 room","welcome to 3 room","welcome to 4 room","welcome to 5 room","welcome to 6 room",};
private int[] attachs = {1,4,3,2,5,1}; //模拟附件数量
private String[] names = {"张三","李四","王五","马六","赵七","刘八"};
private Directory directory =null;
public IndexUtil () throws Exception{
directory = FSDirectory.open(new File(
"D:/lucene-file/index02"));
}
public void index() throws Exception {
IndexWriter writer = null;
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LATEST, new StandardAnalyzer(Version.LATEST)));
Document doc = null;
for (int i = 0; i < ids.length; i++) {
doc = new Document();
doc.add(new Field("id", ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
//field.Store.YES的意思是,是不是把该域中的内容完全存储到索引文件当中,方便进行文本的还原。
//field.Store.NO的意思是,把这个域的内容不存储到索引文件中,但是呢,可以被索引。此时内容无法完全还原。就是无法使用doc.get来还原。一般的搜索引擎也是只显示简介,不会显示所有内容
//---------------------------------
//Field.Index叫做索引选项。
//Index.ANALYZED 进行分词和索引,适用于标题和内容
//Index.NOT_ANALYZED 进行索引,但是不尽兴分词,如身份证号,姓名,ID等,适用于精确搜索
//Index.ANALYZED_NOT_NORMS 进行分词,但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息。
//Index.NOT_ANALYZED_NOT_NORMS 既不进行分词也不存储norms。
//Index.NO 不进行索引
doc.add(new Field("email", emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content", content[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name", names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(doc);
}
if (writer!=null) {
writer.close();
}
}
public void query() throws Exception{
IndexReader indexReader = IndexReader.open(directory);
System.out.println("文档数量是-----"+indexReader.numDocs());
}
}
测试类
package com.junittest;
import static org.junit.Assert.*;
import org.junit.Test;
import com.test.IndexUtil;
public class testIndex {
@Test
public void test() throws Exception {
IndexUtil iUtil = new IndexUtil();
iUtil.index();
}
@Test
public void testquery() throws Exception{
IndexUtil iUtil = new IndexUtil();
iUtil.query();
}
}