读取Lucene 索引数据

最新推荐文章于 2024-08-01 08:03:40 发布

jason_dong

最新推荐文章于 2024-08-01 08:03:40 发布

阅读量1.4k

点赞数

分类专栏： java技术文章标签： java lucene excel

本文链接：https://blog.csdn.net/dongjian764/article/details/22288999

版权

java技术专栏收录该内容

2 篇文章 0 订阅

订阅专栏

前一阵帮朋友写了一个读取Lucene 数据文件的一个小应用，在这里分享给大家，这个代码是基于 Lucene 3.6版本。

package com.pushine;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;


import sun.nio.cs.ext.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
//import org.apache.lucene.document.Fieldable;
//import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


public class AnalyzeCFS {
	public ArrayList<String> headers = new ArrayList<String>();
	public ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();
	public CheckCode cc = new CheckCode();
	
	public void analyze(String filedir1) throws IOException {
		String filedir = filedir1;
		//读取Lucene 数据文件目录 
		Directory dir = FSDirectory.open(new File(filedir));
		//获取IndexReader对象 
		IndexReader indexReader = IndexReader.open(dir);
		//获取数据数量 
		int max = indexReader.maxDoc();
		//获取索引列名
		headers = this.getColName(indexReader);
		//获取索引数据 
		datas = this.getData(indexReader, max);
	}
	
	public ArrayList<ArrayList<String>> getData(IndexReader indexReader,int max) throws  IOException{
		ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();
		ArrayList<String> row ;
		int dataindex = 0;
		//遍历所有Document节点 提取数据 
		for (int n = 0; n < max; n++) {
			row = new ArrayList<String>();
			//获取Document节点 
			Document document = indexReader.document(n);
			//获取FieldInfo 对象 
			FieldInfos FInfo = null;
			FInfo = indexReader.getFieldInfos();
			//初始迭代器，遍历
			Iterator iter = FInfo.iterator();
			
			int index = 0;
			row.add(index,Integer.toString(n));
			index++;
			//遍历Document中每一个列对应的值 
			for (Iterator iterator = FInfo.iterator(); iterator.hasNext();) {
				String s = (String) iterator.next();
				String value = null;
				if(document.get(s) != null ){


					ArrayList<String> cnlist = cc.getcnlist();
					String cncharname ;
					for(String cn:cnlist){
						cc.check(document.get(s));
						value = new String(document.get(s).getBytes(cn),"GBK");
						System.out.println("char set :" + cn);
						System.out.println("change value:" + value);
					}
				


					if(value.startsWith("^"))
					{
						value=value.substring(1);
					}
				}


				row.add(index, value);
				index++;
				System.out.print( s + ":" + value + ",");
			}
			datas.add(dataindex, row);
			dataindex++;
		}
		return datas;
	}




	
	public ArrayList<String> getColName(IndexReader indexReader) throws IOException {
		ArrayList<String> colNames = new ArrayList<String>();
		Document document = indexReader.document(0);
		FieldInfos conn = null;
		List<Fieldable>  idex = document.getFields();
		int index = 0;
		
		conn = indexReader.getFieldInfos();


		Iterator iter = conn.iterator();
		colNames.add(index, "rowNum");
		index++;
		for (Iterator iterator = conn.iterator(); iterator.hasNext();) {
			String s = (String) iterator.next();
			colNames.add(index, s);
			index++;
		}
		
		this.headers = colNames;
		return colNames;
	}
	
	public ArrayList<String> getColNames(){
		return this.headers;
	}


	public ArrayList<ArrayList<String>> getDatas() {
		return datas;
	}
	
	
	
	public static void main(String[] args) {
		ExportExcel expExcel = new ExportExcel();
		AnalyzeCFS analyzeCFS = new AnalyzeCFS();
		expExcel.createFile();
		
		if(args.length < 1){
			System.out.println(args.length);
			System.out.println(args[0] + "  " + args[1]);
			System.out.println("please give file directory ");
			System.out.println(" eg  java -jar AnalyzeCFS  g:\\index");
			System.exit(0);
		}
		
		String filedir = args[0];
		try {
			analyzeCFS.analyze(filedir);

			expExcel.createSheet(sheetindex);
			expExcel.insertHeaders(analyzeCFS.getColNames());	
			
			for(int i=0; i<analyzeCFS.getDatas().size() ; i++ )
			{
				System.out.println("row number : " + i );
				expExcel.createSheet(sheetindex +2);
				expExcel.insertHeaders(analyzeCFS.getColNames());					
				expExcel.insertRow(analyzeCFS.getDatas().get(i));
				
			}
			System.out.println("summary Lines : " + analyzeCFS.getDatas().size() );
			
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		expExcel.saveFile();


	}


}