将多个Excel文件合并成一个的源码(转)

源码是在poi-3.1-FINAL-20080629版本下编译通过。

 

本源码修正了获取sheet中数据行数的BUG,详情看getRows方法。

 

public class ExcelMergeUtil {  
	public static void merge(List<String> sourceFiles, String destFile)throws Exception{
		InputStream[] inputs = new InputStream[sourceFiles.size()]; 
	    for(int i=0; i<sourceFiles.size(); i++) {   
	      inputs[i] = new FileInputStream(sourceFiles.get(i));   
	    }   
	    
	    OutputStream out = new FileOutputStream(destFile); 
	    
	    merge(inputs, out);
	}
	
	public static void merge(InputStream[] inputs, OutputStream out)throws Exception{  
		Map map = null;
		try{
			if(inputs == null || inputs.length <= 1) {   
				throw new IllegalArgumentException("没有传入输入流数组或只有一个输入流!");   
			}   
			System.out.println("需要合并的文件数为:" + inputs.length);
			
			//第一个文档
			List<Record> rootRecords = getRecords(inputs[0]); 
			Workbook workbook = Workbook.createWorkbook(rootRecords); 
			List<Sheet> sheets = getSheets(workbook, rootRecords);   
			if(sheets == null || sheets.size() == 0) {   
				throw new IllegalArgumentException("第一个文档格式错误,必须至少有一个sheet!");   
			}   
			
			//以第一个文档的最后一个sheet为根,以后的数据都追加在这个sheet后面   
			Sheet rootSheet = sheets.get(sheets.size() - 1);    
			int rootRows = getRows(rootRecords); //记录第一篇文档的行数,以后的行数在此基础上增加   
			rootSheet.setLoc(rootSheet.getDimsLoc());   
			map = new HashMap(1000);
			
			for(int i = 1; i < inputs.length; i++){ //从第二篇开始遍历   
				List<Record> records = getRecords(inputs[i]);   
				int rowsOfCurXls = 0;   
				//遍历当前文档的每一个record   
				for(Iterator it = records.iterator(); it.hasNext();){   
			        Record record = (Record) it.next();   
			        if(record.getSid() == RowRecord.sid){ //如果是RowRecord   
			        	RowRecord rowRecord = (RowRecord) record;   
			        	rowRecord.setRowNumber(rootRows + rowRecord.getRowNumber()); //调整行号   
			        	rootSheet.addRow(rowRecord); //追加Row   
			        	rowsOfCurXls++; //记录当前文档的行数   
			        }   
			        //SST记录,SST保存xls文件中唯一的String,各个String都是对应着SST记录的索引   
			        else if (record.getSid() == SSTRecord.sid){   
			        	SSTRecord sstRecord = (SSTRecord) record;   
			        	for (int j = 0; j < sstRecord.getNumUniqueStrings(); j++) {   
			        		int index = workbook.addSSTString(sstRecord.getString(j));   
			        		//记录原来的索引和现在的索引的对应关系   
			        		map.put(Integer.valueOf(j), Integer.valueOf(index));   
			        	}   
			        }
			        else if (record.getSid() == LabelSSTRecord.sid){   
			        	LabelSSTRecord label = (LabelSSTRecord) record;   
			        	//调整SST索引的对应关系   
			        	label.setSSTIndex( ((Integer)map.get(Integer.valueOf(label.getSSTIndex()))).intValue() );   
			        }   
			        
			        //追加ValueCell   
			        if(record instanceof CellValueRecordInterface){   
			        	CellValueRecordInterface cell = (CellValueRecordInterface) record;   
			        	int cellRow = cell.getRow() + rootRows;   
			        	cell.setRow(cellRow);   
			        	rootSheet.addValueRecord(cellRow, cell);   
			        }   
				}   
	      		rootRows += rowsOfCurXls;  
	    	}   
			
	    	byte[] data = getBytes(workbook, sheets.toArray(new Sheet[0]));  
	    	
	    	write(out, data); 
	    	
	    	System.out.println("合并完成");
		}finally{
			if(map!=null){
				map.clear();
				map = null;
			}
		}
	}   
  
	static void write(OutputStream out, byte[] data)throws Exception{   
		POIFSFileSystem fs = new POIFSFileSystem();     
		try{   
			fs.createDocument(new ByteArrayInputStream(data), "Workbook");   
			fs.writeFilesystem(out);   
			out.flush();   
		}finally{   
			try{   
				out.close();   
			}catch(IOException e){   
				e.printStackTrace();   
			}   
		}   
	}   
  
	/**
	 * 获取Sheet列表
	 */
  	static List<Sheet> getSheets(Workbook workbook, List<Record> records)throws Exception{   
  		int recOffset = workbook.getNumRecords();   
  		int sheetNum = 0;   
    
  		convertLabelRecords(records, recOffset, workbook);   
    
  		List<Sheet> sheets = new ArrayList<Sheet>();   
  		while(recOffset < records.size()){   
  			Sheet sheet = Sheet.createSheet(records, sheetNum++, recOffset);
  			recOffset = sheet.getEofLoc() + 1;   
  			if(recOffset == 1) break;  
  			sheets.add(sheet);   
  		}   
    	return sheets;   
  	} 

  	/**
  	 * 取得一个sheet中数据的行数
  	 */
  	static int getRows(List<Record> records) {   
  	    int row = 0;   
  	    for(Iterator it = records.iterator(); it.hasNext();){   
  	      Record record = (Record) it.next();
  	      if(record.getSid() == DimensionsRecord.sid){   
  	    	  DimensionsRecord dr = (DimensionsRecord)record;
  	    	  row = dr.getLastRow();
  	    	  break;
  	      }   
  	    }   
  	    return row;   
  	}  
     
  	/**
  	 * 获取Excel文档的记录集
   	*/
  	static List<Record> getRecords(InputStream input) {   
  		try{   
  			POIFSFileSystem poifs = new POIFSFileSystem(input); 
  			InputStream stream = poifs.getRoot().createDocumentInputStream("Workbook");   
  			return RecordFactory.createRecords(stream);
  		}catch(IOException e){   
  			System.out.println("ExcelMergeUtil.getRecords: " + e.toString());
  			e.printStackTrace();   
  		}   
  		return Collections.EMPTY_LIST;   
  	}   
  
  	static void convertLabelRecords(List<Record> records, int offset, Workbook workbook)throws Exception{   
  		for(int k = offset; k < records.size(); k++){   
  			Record rec = (Record) records.get(k);   
  
  			if (rec.getSid() == LabelRecord.sid) {   
  				LabelRecord oldrec = (LabelRecord) rec;   
  				
		        records.remove(k);     
		        int stringid = workbook.addSSTString(new UnicodeString(oldrec.getValue()));   

		        LabelSSTRecord newrec = new LabelSSTRecord(); 
		        newrec.setRow(oldrec.getRow());   
		        newrec.setColumn(oldrec.getColumn());   
		        newrec.setXFIndex(oldrec.getXFIndex());   
		        newrec.setSSTIndex(stringid);   
		        records.add(k, newrec);   
  			}
  		}   
  	}   
  
  	static byte[] getBytes(Workbook workbook, Sheet[] sheets) { 
  		int nSheets = sheets.length;   
     
  		for(int i = 0; i < nSheets; i++){   
  			sheets[i].preSerialize();   
  		}   
  		
  		int totalsize = workbook.getSize();   
    
  		int[] estimatedSheetSizes = new int[nSheets];   
  		for(int k = 0; k < nSheets; k++){   
  			workbook.setSheetBof(k, totalsize);   
  			int sheetSize = sheets[k].getSize();   
  			estimatedSheetSizes[k] = sheetSize;   
  			totalsize += sheetSize;
  		} 
  
  		byte[] retval = new byte[totalsize];   
  		int pos = workbook.serialize(0, retval);   
  
  		for(int k = 0; k < nSheets; k++){   
  			int serializedSize = sheets[k].serialize(pos, retval);   
  			if(serializedSize != estimatedSheetSizes[k]){   
  				throw new IllegalStateException("Actual serialized sheet size (" + serializedSize   
  						+ ") differs from pre-calculated size (" + estimatedSheetSizes[k] + ") for sheet (" + k   
  						+ ")"); 
  			}   
  			pos += serializedSize;   
  		}   
    	return retval;   
  	} 
  
} 

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
要使用Python将多个Excel文件合并,可以使用pandas库中的read_excel函数读取每个Excel文件,并使用concat函数将它们合并一个数据帧。以下是一个示例代码: ```python import pandas as pd import os def merge_excel_files(path): file_list = os.listdir(path) # 获取文件夹中的所有文件名 frames = \[\] # 建立一个空列表,用于存储数据帧 for file in file_list: file_with_path = os.path.join(path, file) # 获取文件的完整路径 df = pd.read_excel(file_with_path) # 读取Excel文件为数据帧 frames.append(df) # 将数据帧添加到列表中 merged_df = pd.concat(frames, ignore_index=True) # 使用concat函数将所有数据帧合并一个 return merged_df # 调用函数并指定需要合并Excel文件所在的文件夹路径 merged_data = merge_excel_files("文件夹路径") # 可以将合并后的数据保存为一个新的Excel文件 merged_data.to_excel("合并后的文件路径", index=False) ``` 请将代码中的"文件夹路径"替换为包含要合并Excel文件文件夹的实际路径,并将"合并后的文件路径"替换为您希望保存合并后数据的Excel文件的路径。 #### 引用[.reference_title] - *1* [如何使用python合并多个excel文件](https://blog.csdn.net/xili1342/article/details/124983308)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control_2,239^v3^insert_chatgpt"}} ] [.reference_item] - *2* *3* [Python Excel教程之如何将多个 excel 文件合并一个文件(教程含源码)](https://blog.csdn.net/iCloudEnd/article/details/125107326)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control_2,239^v3^insert_chatgpt"}} ] [.reference_item] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值