读取Lucene 索引数据

前一阵帮朋友写了一个读取Lucene 数据文件的一个小应用,在这里分享给大家 ,这个代码是基于 Lucene 3.6版本。

package com.pushine;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;


import sun.nio.cs.ext.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
//import org.apache.lucene.document.Fieldable;
//import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


public class AnalyzeCFS {
	public ArrayList<String> headers = new ArrayList<String>();
	public ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();
	public CheckCode cc = new CheckCode();
	
	public void analyze(String filedir1) throws IOException {
		String filedir = filedir1;
		//读取Lucene 数据文件目录 
		Directory dir = FSDirectory.open(new File(filedir));
		//获取IndexReader对象 
		IndexReader indexReader = IndexReader.open(dir);
		//获取数据数量 
		int max = indexReader.maxDoc();
		//获取索引列名
		headers = this.getColName(indexReader);
		//获取索引数据 
		datas = this.getData(indexReader, max);
	}
	
	public ArrayList<ArrayList<String>> getData(IndexReader indexReader,int max) throws  IOException{
		ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();
		ArrayList<String> row ;
		int dataindex = 0;
		//遍历所有Document节点 提取数据 
		for (int n = 0; n < max; n++) {
			row = new ArrayList<String>();
			//获取Document节点 
			Document document = indexReader.document(n);
			//获取FieldInfo 对象 
			FieldInfos FInfo = null;
			FInfo = indexReader.getFieldInfos();
			//初始迭代器,遍历
			Iterator iter = FInfo.iterator();
			
			int index = 0;
			row.add(index,Integer.toString(n));
			index++;
			//遍历Document中每一个列对应的值 
			for (Iterator iterator = FInfo.iterator(); iterator.hasNext();) {
				String s = (String) iterator.next();
				String value = null;
				if(document.get(s) != null ){


					ArrayList<String> cnlist = cc.getcnlist();
					String cncharname ;
					for(String cn:cnlist){
						cc.check(document.get(s));
						value = new String(document.get(s).getBytes(cn),"GBK");
						System.out.println("char set :" + cn);
						System.out.println("change value:" + value);
					}
				


					if(value.startsWith("^"))
					{
						value=value.substring(1);
					}
				}


				row.add(index, value);
				index++;
				System.out.print( s + ":" + value + ",");
			}
			datas.add(dataindex, row);
			dataindex++;
		}
		return datas;
	}




	
	public ArrayList<String> getColName(IndexReader indexReader) throws IOException {
		ArrayList<String> colNames = new ArrayList<String>();
		Document document = indexReader.document(0);
		FieldInfos conn = null;
		List<Fieldable>  idex = document.getFields();
		int index = 0;
		
		conn = indexReader.getFieldInfos();


		Iterator iter = conn.iterator();
		colNames.add(index, "rowNum");
		index++;
		for (Iterator iterator = conn.iterator(); iterator.hasNext();) {
			String s = (String) iterator.next();
			colNames.add(index, s);
			index++;
		}
		
		this.headers = colNames;
		return colNames;
	}
	
	public ArrayList<String> getColNames(){
		return this.headers;
	}


	public ArrayList<ArrayList<String>> getDatas() {
		return datas;
	}
	
	
	
	public static void main(String[] args) {
		ExportExcel expExcel = new ExportExcel();
		AnalyzeCFS analyzeCFS = new AnalyzeCFS();
		expExcel.createFile();
		
		if(args.length < 1){
			System.out.println(args.length);
			System.out.println(args[0] + "  " + args[1]);
			System.out.println("please give file directory ");
			System.out.println(" eg  java -jar AnalyzeCFS  g:\\index");
			System.exit(0);
		}
		
		String filedir = args[0];
		try {
			analyzeCFS.analyze(filedir);	
			expExcel.createSheet(sheetindex);
			expExcel.insertHeaders(analyzeCFS.getColNames());	
			
			for(int i=0; i<analyzeCFS.getDatas().size() ; i++ )
			{
				System.out.println("row number : " + i );
				expExcel.createSheet(sheetindex +2);
				expExcel.insertHeaders(analyzeCFS.getColNames());					
				expExcel.insertRow(analyzeCFS.getDatas().get(i));
				
			}
			System.out.println("summary Lines : " + analyzeCFS.getDatas().size() );
			
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		expExcel.saveFile();


	}


} 


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值