poi读取excel的使用

使用POI相关API来读取Excel,代码如下:

package com.pan.utils;

import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.WorkbookUtil;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import com.excel.entity.SheetResult;

/**
 * 读取Excel的工具类
 * <br/>
 * 需要做的几件事:<br/>
 * 1. 获取Excel文件的Workbook对象;<br/>
 * 2. 读取指定Sheet页的所有内容;<br/>
 * 3. 读取指定Sheet页中某些有用数据;<br/>
 * 4. 读取Sheet页数据封装成对象;<br/>
 * 
 * @since Excel Study 1.0
 */
public class ReadExcelUtil {
	
	public final static int START = 0;
	
	/**
	 * 获取Excel文件的Workbook对象
	 * @param fileName
	 * @return
	 * @throws Exception
	 */
	public static Workbook getWorkBook(String fileName) throws Exception {
		long startTime = System.currentTimeMillis();
//		Workbook wb = WorkbookFactory.create(new File(fileName));
//		Workbook wb = WorkbookFactory.create(new FileInputStream(fileName));
		
//		NPOIFSFileSystem fs = new NPOIFSFileSystem(new FileInputStream(fileName));
//		HSSFWorkbook wb = new HSSFWorkbook(fs.getRoot(), true);
		
		Workbook wb = null;
		try {
//			NPOIFSFileSystem fs = new NPOIFSFileSystem(new FileInputStream(fileName));
//			wb = new HSSFWorkbook(fs.getRoot(), true);
			wb = new HSSFWorkbook(new FileInputStream(fileName));
		} catch (Exception e) {
			wb = new XSSFWorkbook(new FileInputStream(fileName));
//			wb = WorkbookFactory.create(new FileInputStream(fileName));
		}
		  
//		  OPCPackage pkg = OPCPackage.open(new FileInputStream(fileName));
//		  XSSFWorkbook wb = new XSSFWorkbook(pkg);
		  
		  System.out.println(System.currentTimeMillis() - startTime);
		return wb;
	}
	
	/**
	 * 读取指定Sheet页的所有内容
	 * @param sheet
	 * @return
	 */
	public static List<List<String>> readFromSheet(Sheet sheet) {
		if(sheet == null) {
			throw new NullPointerException("sheet not be null");
		}
		
		//保存所有读取到的数据
		List<List<String>> dataList = new ArrayList<List<String>>();
		
		//遍历行
		for (Row row : sheet) {
			List<String> rowDataList = new ArrayList<String>();
			
			//遍历列
	        for (Cell cell : row) {
	        	
	            rowDataList.add(getCellValue(cell));
	        }
	        
	        dataList.add(rowDataList);
	    }
		return dataList;
	}
	
	/**
	 * 读取指定Sheet页的所有内容
	 * @param wb
	 * @param sheetName
	 * @return
	 */
	public static List<List<String>> readFromSheet(Workbook wb, String sheetName) {
		Sheet sheet = wb.getSheet(sheetName);
		return readFromSheet(sheet);
	}
	
	/**
	 * 读取指定Sheet页中某些有用数据
	 * @param sheet
	 * @param includeColNameSet			有用数据开始的列名集合,用于有用数据所在位置的可信度判断
	 * @return
	 */
	public static List<List<String>> readFromSheet(Sheet sheet, Set<String> includeColNameSet) {
		if(sheet == null) {
			throw new NullPointerException("sheet not be null");
		}
		
		//保存所有读取到的数据
		List<List<String>> dataList = new ArrayList<List<String>>();
		
		//存储临时数据的列表,用于定位有用数据所在的位置
		List<String> tempValueList = new ArrayList<String>();
		
		String tempValue = null;
		int startCellNum = START;
		boolean isStart = false; 
		
		//可能是有用数据,可能为true,否则为false,可能的依据是:包含可用数据
		boolean isPossible = false;
		int rowIndex = 0;
		
		//遍历行
		for(Row row : sheet) {
			isPossible = false;
			List<String> rowDataList = null;
			if(startCellNum == START) {
				tempValueList.clear();
			} else {
				rowDataList = new ArrayList<String>();
			}
			
			//遍历列
	        for (int j = startCellNum, cellLen = row.getLastCellNum() ; j < cellLen; j++) {
	        	Cell cell = row.getCell(j);
	        	tempValue = getCellValue(cell);
	        	
	            
	            if(!isStart || startCellNum == START) {
            		tempValueList.add(tempValue);
            	} else {
            		rowDataList.add(tempValue);
            	}
	            
	            if(!isStart && includeColNameSet.contains(tempValue)) {
	            	isPossible = true;
	            }
	        }
	        
            if(startCellNum == START && isPossible) {
            	//获取可用数据所在的列
            	startCellNum = getColIndex(tempValueList, includeColNameSet);
            }
	        
	        //可信位置存在,则判断其可信度
	        if(!isStart && startCellNum != START) {
        		rowDataList = new ArrayList<String>();
        		
        		//添加有用数据
        		for(int k = startCellNum, cellLen = row.getLastCellNum(); k < cellLen; k++) {
        			rowDataList.add(tempValueList.get(k));
        		}
        		dataList.add(rowDataList);
        		rowIndex++;
        		isStart = true;
	        } else if(startCellNum != START) {
	        	if(isEquals(dataList.get(rowIndex-1), rowDataList)) {
	        		break;
	        	} else {
	        		dataList.add(rowDataList);
	        		rowIndex++;
	        	}
	        	
	        }
	        
	    }
		return dataList;
	}
	
	/**
	 * 读取指定Sheet页中某些有用数据
	 * @param wb
	 * @param sheetName
	 * @param includeColNameSet
	 * @return
	 */
	public static List<List<String>> readFromSheet(Workbook wb,
							String sheetName, Set<String> includeColNameSet) {
		Sheet sheet = wb.getSheet(sheetName);
		return readFromSheet(sheet, includeColNameSet);
	}
	

	/**
	 * 读取Sheet页数据封装成对象
	 * @param sheet
	 * @param includeColNameSet
	 * @param colLineNum			列名行数
	 * @return
	 */
	public static SheetResult readFromSheet(Sheet sheet,  Set<String> includeColNameSet, 
									int colLineNum) {
		if(sheet == null) {
			throw new NullPointerException("sheet not be null");
		}
		
		//保存所有读取到的数据
		List<List<String>> dataList = new ArrayList<List<String>>();
		
		//记录列名与数据索引
		Map<String, Integer> colNameMap = null;
		
		//存储临时数据的列表,用于定位有用数据所在的位置
		List<String> tempValueList = new ArrayList<String>(sheet.getLastRowNum() / 2);
		
		String tempValue = null;
		int maxCellNum = Integer.MAX_VALUE;
		int cellNum = Integer.MAX_VALUE;
		int startCellNum = START;
		boolean isStart = false; 
		
		//可能是有用数据,可能为true,否则为false,可能的依据是:包含可用数据
		boolean isPossible = false;
		int rowIndex = 0;
		int index = 0;
		
		//遍历行
		for(Row row : sheet) {
			isPossible = false;
			List<String> rowDataList = null;
			if(startCellNum == START) {
				tempValueList.clear();
				cellNum = Math.min(row.getLastCellNum(), maxCellNum);
			} else {
				rowDataList = new ArrayList<String>(maxCellNum - startCellNum);
				cellNum = maxCellNum;
			}
			
			//遍历列
	        for (int j = startCellNum; j < cellNum; j++) {
	        	Cell cell = row.getCell(j);
	        	tempValue = getCellValue(cell);
	        	
	            
	            if(!isStart || startCellNum == START) {
            		tempValueList.add(tempValue);
            	} else {
            		rowDataList.add(tempValue);
            	}
	            
	            if(!isStart && includeColNameSet.contains(tempValue)) {
	            	isPossible = true;
	            }
	        }
	        
            if(startCellNum == START && isPossible) {
            	//获取可用数据所在的列
            	startCellNum = getColIndex(tempValueList, includeColNameSet);
            }
	        
	        //可信位置存在,则判断其可信度
	        if(!isStart && startCellNum != START) {
        		rowDataList = new ArrayList<String>();
        		
        		colNameMap = getColNameMap(sheet, index, startCellNum, colLineNum);
        		maxCellNum = startCellNum + colNameMap.size();
        		//添加有用数据
        		for(int k = startCellNum; k < maxCellNum; k++) {
        			rowDataList.add(tempValueList.get(k));
        		}
        		dataList.add(rowDataList);
        		rowIndex++;
        		isStart = true;
	        } else if(startCellNum != START) {
	        	if(isEquals(dataList.get(rowIndex-1), rowDataList)) {
	        		break;
	        	} else {
	        		dataList.add(rowDataList);
	        		rowIndex++;
	        	}
	        	
	        }
	        
	        index++;
	    }
		SheetResult sheetResult = new SheetResult();
		sheetResult.setColNameMap(colNameMap);
		sheetResult.setDataList(dataList);
		
		return sheetResult;
	}
	
	/**
	 * 读取Sheet页数据封装成对象
	 * @param wb
	 * @param sheetName
	 * @param includeColNameSet
	 * @param colLineNum			列名行数
	 * @return
	 */
	public static SheetResult readFromSheet(Workbook wb, String sheetName,
						Set<String> includeColNameSet, int colLineNum) {
		Sheet sheet = wb.getSheet(sheetName);
		return readFromSheet(sheet, includeColNameSet, colLineNum);
	}
	
	/**
	 * 获取列名与数据索引映射
	 * @param sheet
	 * @param rowNo
	 * @param cellNo
	 * @param colLineNum
	 * @return
	 */
	private static Map<String, Integer> getColNameMap(Sheet sheet, int rowNo, 
													int cellNo, int colLineNum) {
		//TODO 获取列名信息
		Map<String, Integer> colNameMap = new TreeMap<String, Integer>();
		colNameMap.put("1", 1);
		colNameMap.put("2", 1);
		colNameMap.put("3", 1);
		colNameMap.put("4", 1);
		colNameMap.put("5", 1);
		colNameMap.put("6", 1);
		return colNameMap;
	}
	
	
	/**
	 * 获取Cell对象表示的值
	 * @param cell
	 * @return
	 */
	private static String getCellValue(Cell cell) {
		if(cell == null) {
			return "";
		}
		String tempValue = null;
		//判断Cell的类型
        switch (cell.getCellType()) {
            case Cell.CELL_TYPE_STRING:
            	tempValue = cell.getRichStringCellValue().getString();
                break;
                
            case Cell.CELL_TYPE_NUMERIC:
                if (DateUtil.isCellDateFormatted(cell)) {
                	tempValue = cell.getDateCellValue().toString();
                } else {
                	tempValue = String.valueOf(cell.getNumericCellValue());
                }
                break;
                
            case Cell.CELL_TYPE_BOOLEAN:
            	tempValue = String.valueOf(cell.getBooleanCellValue());
                break;
                
            case Cell.CELL_TYPE_FORMULA:
            	tempValue = cell.getCellFormula();
                break;
                
            default:
            	tempValue = "";
            	break;
        }
        return tempValue;
	}
	
	/**
	 * 获取有用数据所在的列
	 * @param tempValueList
	 * @param includeColNameSet
	 * @return
	 */
	private static int getColIndex(List<String> tempValueList, 
										Set<String> includeColNameSet) {
		for(int i = 0, len = tempValueList.size(); i < len; i++) {
			//如果包含指定的列名
			if(includeColNameSet.contains(tempValueList.get(i))) {
				if(isTruePosition(tempValueList, i, includeColNameSet)) {
					return i;
				}
			}
		}
		return START;
	}
	
	/**
	 * 判断当前行所在数据是否是有用数据所在的行
	 * @param tempValueList
	 * @param start
	 * @param includeColNameSet
	 * @return
	 */
	private static boolean isTruePosition(List<String> tempValueList, int start,
										Set<String> includeColNameSet) {
		boolean isPosition = true;
		if(tempValueList.size() < start + includeColNameSet.size()) {
			isPosition = false;
			return isPosition;
		}
		for(int i = start, len = start + includeColNameSet.size(); i < len; i++) {
			if(!includeColNameSet.contains(tempValueList.get(i))) {
				isPosition = false;
				break;
			}
		}
		return isPosition;
	}
	
	/**
	 * 判断两行数据是否相同
	 * @param valueList
	 * @param compareValueList
	 * @return
	 */
	private static boolean isEquals(List<String> valueList, List<String> compareValueList) {
		boolean isEnd = true;
		int len = valueList.size() > compareValueList.size() ? compareValueList.size() : valueList.size();
		for(int i = 0; i < len; i++) {
			if(!compareValueList.get(i).equals("") 
					&& !valueList.get(i).equals(compareValueList.get(i))) {
				isEnd = false;
			}
		}
		return isEnd;
	}
	
	public static void main(String[] args) throws Exception {
		long startTime = System.currentTimeMillis();
		
		//Name: /xl/workbook.xml - Content Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml
		Workbook wb = getWorkBook("J:\\MyEclipse2014\\studyworkspace\\MicroftOffice\\temp\\test.xlsx");
		//Name: /xl/workbook.xml - Content Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml
		//org.apache.poi.hssf.usermodel.HSSFWorkbook@77d0bef0
//		wb = getWorkBook("J:\\MyEclipse2014\\studyworkspace\\MicroftOffice\\temp\\test2007.xls");
		
		System.out.println(System.currentTimeMillis() - startTime);
		System.out.println(wb);
		
		
		
		readFromSheet(wb, "type");
		
		Set<String> includeColNameSet = new HashSet<String>();
		includeColNameSet.add("START");
		includeColNameSet.add("VOL");
		includeColNameSet.add("VOH");
		includeColNameSet.add("DFS");
		includeColNameSet.add("FG");
		readFromSheet(wb, "type", includeColNameSet);
		
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		readFromSheet(wb, "type", includeColNameSet, 1);
		
		
		long endTime = System.currentTimeMillis();
		
		System.out.println(endTime - startTime);
	}

}

发现主要的时间还是在获取Workbook对象上,而对于97-03的文件的读取速度会比07的文件快很多

excel内容如下图所示:


  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论
android 使用poi读取高版本excel, 解决以下这两个错误 java.lang.NoClassDefFoundError: Failed resolution of: Ljavax/xml/stream/XMLEventFactory; at org.apache.poi.openxml4j.opc.internal.marshallers.PackagePropertiesMarshaller.(PackagePropertiesMarshaller.java:41) at org.apache.poi.openxml4j.opc.OPCPackage.init(OPCPackage.java:161) at org.apache.poi.openxml4j.opc.OPCPackage.(OPCPackage.java:141) at org.apache.poi.openxml4j.opc.ZipPackage.(ZipPackage.java:97) at org.apache.poi.openxml4j.opc.OPCPackage.open(OPCPackage.java:324) at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:184) at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:149) javax.xml.stream.FactoryConfigurationError: Provider com.sun.xml.internal.stream.events.XMLEventFactoryImpl not found at javax.xml.stream.FactoryFinder.newInstance(Unknown Source) at javax.xml.stream.FactoryFinder.newInstance(Unknown Source) at javax.xml.stream.FactoryFinder.find(Unknown Source) at javax.xml.stream.FactoryFinder.find(Unknown Source) at javax.xml.stream.XMLEventFactory.newInstance(Unknown Source) at org.apache.poi.openxml4j.opc.internal.marshallers.PackagePropertiesMarshaller.(PackagePropertiesMarshaller.java:41) at org.apache.poi.openxml4j.opc.OPCPackage.init(OPCPackage.java:161) at org.apache.poi.openxml4j.opc.OPCPackage.(OPCPackage.java:141) at org.apache.poi.openxml4j.opc.ZipPackage.(ZipPackage.java:97) at org.apache.poi.openxml4j.opc.OPCPackage.open(OPCPackage.java:324) at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:184) at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:149)

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

来了就走下去

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值