前一阵帮朋友写了一个读取Lucene 数据文件的一个小应用,在这里分享给大家 ,这个代码是基于 Lucene 3.6版本。
package com.pushine;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import sun.nio.cs.ext.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
//import org.apache.lucene.document.Fieldable;
//import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class AnalyzeCFS {
public ArrayList<String> headers = new ArrayList<String>();
public ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();
public CheckCode cc = new CheckCode();
public void analyze(String filedir1) throws IOException {
String filedir = filedir1;
//读取Lucene 数据文件目录
Directory dir = FSDirectory.open(new File(filedir));
//获取IndexReader对象
IndexReader indexReader = IndexReader.open(dir);
//获取数据数量
int max = indexReader.maxDoc();
//获取索引列名
headers = this.getColName(indexReader);
//获取索引数据
datas = this.getData(indexReader, max);
}
public ArrayList<ArrayList<String>> getData(IndexReader indexReader,int max) throws IOException{
ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();
ArrayList<String> row ;
int dataindex = 0;
//遍历所有Document节点 提取数据
for (int n = 0; n < max; n++) {
row = new ArrayList<String>();
//获取Document节点
Document document = indexReader.document(n);
//获取FieldInfo 对象
FieldInfos FInfo = null;
FInfo = indexReader.getFieldInfos();
//初始迭代器,遍历
Iterator iter = FInfo.iterator();
int index = 0;
row.add(index,Integer.toString(n));
index++;
//遍历Document中每一个列对应的值
for (Iterator iterator = FInfo.iterator(); iterator.hasNext();) {
String s = (String) iterator.next();
String value = null;
if(document.get(s) != null ){
ArrayList<String> cnlist = cc.getcnlist();
String cncharname ;
for(String cn:cnlist){
cc.check(document.get(s));
value = new String(document.get(s).getBytes(cn),"GBK");
System.out.println("char set :" + cn);
System.out.println("change value:" + value);
}
if(value.startsWith("^"))
{
value=value.substring(1);
}
}
row.add(index, value);
index++;
System.out.print( s + ":" + value + ",");
}
datas.add(dataindex, row);
dataindex++;
}
return datas;
}
public ArrayList<String> getColName(IndexReader indexReader) throws IOException {
ArrayList<String> colNames = new ArrayList<String>();
Document document = indexReader.document(0);
FieldInfos conn = null;
List<Fieldable> idex = document.getFields();
int index = 0;
conn = indexReader.getFieldInfos();
Iterator iter = conn.iterator();
colNames.add(index, "rowNum");
index++;
for (Iterator iterator = conn.iterator(); iterator.hasNext();) {
String s = (String) iterator.next();
colNames.add(index, s);
index++;
}
this.headers = colNames;
return colNames;
}
public ArrayList<String> getColNames(){
return this.headers;
}
public ArrayList<ArrayList<String>> getDatas() {
return datas;
}
public static void main(String[] args) {
ExportExcel expExcel = new ExportExcel();
AnalyzeCFS analyzeCFS = new AnalyzeCFS();
expExcel.createFile();
if(args.length < 1){
System.out.println(args.length);
System.out.println(args[0] + " " + args[1]);
System.out.println("please give file directory ");
System.out.println(" eg java -jar AnalyzeCFS g:\\index");
System.exit(0);
}
String filedir = args[0];
try {
analyzeCFS.analyze(filedir);
expExcel.createSheet(sheetindex);
expExcel.insertHeaders(analyzeCFS.getColNames());
for(int i=0; i<analyzeCFS.getDatas().size() ; i++ )
{
System.out.println("row number : " + i );
expExcel.createSheet(sheetindex +2);
expExcel.insertHeaders(analyzeCFS.getColNames());
expExcel.insertRow(analyzeCFS.getDatas().get(i));
}
System.out.println("summary Lines : " + analyzeCFS.getDatas().size() );
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
expExcel.saveFile();
}
}