如何操作Excel

最新推荐文章于 2024-07-13 13:55:40 发布

weixin_34401479

最新推荐文章于 2024-07-13 13:55:40 发布

阅读量95

点赞数

文章标签： java python

原文链接：https://my.oschina.net/u/1398304/blog/315869

版权

2019独角兽企业重金招聘Python工程师标准>>>

2003excel直接采用的jxl进行解析，2007excel其实是个压缩文件，我们用解压缩工具可以打开，里面有对于这个excel结构的完整描述。开始采用的解析方法是直接解析压缩文件里面的excel结构来进行解析，但是处理小数的时候有点问题，最后还是换成poi的方式进行，但是处理小数仍然有问题，还是通过结构里面的描述来解决。各种数据类型基本都测试通过，可以直接使用，jxl和poi的jar包网上可以直接下。废话不多说，直接贴代码。

代码包含2个文件：ExcelParser（解析类），EpointSheet（解析后对于excel封装的对象类）

package com.epoint.utility.excel;

import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;

import jxl.Cell;
import jxl.CellType;
import jxl.DateCell;
import jxl.NumberCell;
import jxl.Sheet;
import jxl.Workbook;

import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTCellImpl;

import com.epoint.utility.string.StringUtil;

/**
* excel解析工具类(针对2003版本的excel采用jxl进行解析,2007的采用poi解析,
* 解析之后会封装成EpointSheet对象集合
* ,每个EpointSheet代表一个excel的一个sheet,具体属性可以参看EpointSheet类的说明)
*
* @作者 komojoemary
* @version [版本号, 2011-4-7]
* @see [相关类/方法]
* @since [产品/模块版本]
*/
public class ExcelParser
{
/**
* 解析excel文件
*
* @param fileName
* excel文件全路径
* @return List<EpointSheet> 封装好的sheet对象集合
*/
public static List<EpointSheet> parseExcel(String fileName) {
if (StringUtil.getFileName(fileName).indexOf("xlsx") != -1) {
return parse2007ExcelPoi(fileName);
}
else {
return parse2003ExcelJxl(fileName);
}
}

/**
* 采用jxl工具类来解析2003excel
*
* @param fileName
* 文件名字
* @return List<EpointSheet>
*/
private static List<EpointSheet> parse2003ExcelJxl(String fileName) {
List<EpointSheet> result = null;
try {
// 构建1个工作簿对象
Workbook rwb = Workbook.getWorkbook(new File(fileName));
// 获取所有的sheet
Sheet[] allSheet = rwb.getSheets();
if (allSheet != null && allSheet.length > 0) {
result = new ArrayList<EpointSheet>();
for (int i = 0; i < allSheet.length; i++) {
Sheet rs = rwb.getSheet(i);
int row = rs.getRows();
int column = rs.getColumns();
List<Object[]> excelValue = new ArrayList<Object[]>();
String sheetName = rs.getName();
// 解析某个sheet每行的值
for (int j = 0; j < row; j++) {
Object[] rowValue = new Object[column];
for (int m = 0; m < column; m++) {
Cell cell = rs.getCell(m, j);
// 日期
if (cell.getType() == CellType.DATE) {
DateCell dateCell = (DateCell) cell;
rowValue[m] = dateCell.getDate();
}
// 数字
else if (cell.getType() == CellType.NUMBER) {
NumberCell numberCell = (NumberCell) cell;
rowValue[m] = numberCell.getValue();
}
else {
rowValue[m] = cell.getContents().trim();
}
}
excelValue.add(rowValue);
}
// 构建1个sheet模型
EpointSheet sheet = new EpointSheet(sheetName, excelValue);
result.add(sheet);
}
}
}
catch (Exception e) {
e.printStackTrace();
}
return result;
}

/**
* 采用poi工具类来解析2007excel
*
* @param fileName
* 文件名字
* @return List<EpointSheet>
*/
private static List<EpointSheet> parse2007ExcelPoi(String fileName) {
List<EpointSheet> result = null;
try {
result = new ArrayList<EpointSheet>();
// 构造 XSSFWorkbook 对象，strPath 传入文件路径
XSSFWorkbook xwb = new XSSFWorkbook(fileName);
// 循环工作表Sheet
for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {
XSSFSheet xSheet = xwb.getSheetAt(numSheet);
if (xSheet == null) {
continue;
}
// 行数
int row = xSheet.getLastRowNum();
// excel的值
List<Object[]> excelValue = new ArrayList<Object[]>();
String sheetName = xSheet.getSheetName();
// 循环行Row
for (int rowNum = 0; rowNum <= row; rowNum++) {
XSSFRow xRow = xSheet.getRow(rowNum);
if (xRow == null) {
continue;
}
// 每一行的值
Object[] rowValue = null;
// 列数
int column = xRow.getLastCellNum();
rowValue = new Object[column];
// 循环列Cell
for (int cellNum = 0; cellNum < column; cellNum++) {
XSSFCell xCell = xRow.getCell(cellNum);
if (xCell != null && !"".equals(xCell)) {
if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) {
rowValue[cellNum] = xCell.getBooleanCellValue();
}
else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {
try {
String s = null;
CTCellImpl impl = (CTCellImpl) xCell.getCTCell();
String mm = impl.toString();
String sign = "s=\"";
int startIndex = mm.indexOf(sign);
if (startIndex > -1) {
startIndex += sign.length();
int endIndex = mm.indexOf("\"", startIndex);
s = mm.substring(startIndex, endIndex);
}
rowValue[cellNum] = xCell.getNumericCellValue();
rowValue[cellNum] = getDateFromStr(s, rowValue[cellNum].toString());
}
catch (Exception e) {
rowValue[cellNum] = xCell.getNumericCellValue();
}
}
else {
rowValue[cellNum] = xCell.getStringCellValue();
}
}
}
excelValue.add(rowValue);
}
// 构建1个sheet模型
EpointSheet sheet = new EpointSheet(sheetName, excelValue);
result.add(sheet);
}
}
catch (IOException e) {
e.printStackTrace();
}
return result;
}

// /**
// * 解析2007版本的excel,采用最原始的java解析,根据excel文件内部的信息
// * 这种方法在处理小数时有些问题,比如5位小数,它会取数补足后面的位数,所以改成用POI来解析2007excel
// * @param fileName
// * 文件名字
// * @return List<EpointSheet> 解析好的sheet对象集合
// */
// private static List<EpointSheet> parse2007Excel(String fileName) {
// List<EpointSheet> result = null;
// try {
// // 解压.xlsx
// ZipFile xlsxFile = new ZipFile(new File(fileName));
// DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
// // 先读取sharedStrings.xml这个文件备用
// String[] sharedStrings = null;
// ZipEntry sharedStringXML = xlsxFile.getEntry("xl/sharedStrings.xml");
// if (sharedStringXML != null) {
// InputStream sharedStringXMLIS = xlsxFile.getInputStream(sharedStringXML);
// Document sharedString =
// dbf.newDocumentBuilder().parse(sharedStringXMLIS);
// NodeList str = sharedString.getElementsByTagName("t");
// sharedStrings = new String[str.getLength()];
// for (int n = 0; n < str.getLength(); n++) {
// Element element = (Element) str.item(n);
// sharedStrings[n] = element.getTextContent();
// }
// }
// // 找到解压文件夹里的workbook.xml,此文件中包含了这张工作表中有几个sheet
// ZipEntry workbookXML = xlsxFile.getEntry("xl/workbook.xml");
// InputStream workbookXMLIS = xlsxFile.getInputStream(workbookXML);
// Document doc = dbf.newDocumentBuilder().parse(workbookXMLIS);
// // 获取一共有几个sheet
// NodeList nl = doc.getElementsByTagName("sheet");
// result = new ArrayList<EpointSheet>();
// // 遍历所有的sheet
// for (int i = 0; i < nl.getLength(); i++) {
// Element element = (Element) nl.item(i);
// // 接着就要到解压文件夹里找到对应的name值的xml文件，比如在workbook.xml中有<sheet
// // name="Sheet1" sheetId="1" r:id="rId1" /> 节点
// // 那么就可以在解压文件夹里的xl/worksheets下找到sheet1.xml,这个xml文件夹里就是包含的表格的内容
// String sheetName = element.getAttribute("name").toLowerCase();
// // 解压获取某个sheet的信息
// ZipEntry sheetXML = xlsxFile.getEntry("xl/worksheets/" + sheetName +
// ".xml");
// if (sheetXML != null) {
// InputStream sheetXMLIS = xlsxFile.getInputStream(sheetXML);
// Document sheetdoc = dbf.newDocumentBuilder().parse(sheetXMLIS);
// // "dimension",解析行列总定义,获取总列数
// NodeList define = sheetdoc.getElementsByTagName("dimension");
// Element totalDefine = (Element) define.item(0);
// String lay = totalDefine.getAttribute("ref");
// int columnNumber = getColumnIndex(lay);
// NodeList rowdata = sheetdoc.getElementsByTagName("row");
// List<Object[]> excelValue = new ArrayList<Object[]>();
// // 解析sheet的行数据
// for (int j = 0; j < rowdata.getLength(); j++) {
// // 得到每个行
// // 行的格式：
// /*
// * <row r="1" spans="1:3">r表示第一行,spans表示有几列 <c r="A1"
// * t="s">/
// * /r表示该列的列表，t="s"个人认为是表示这个单元格的内容可以在sharedStrings
// * .xml这个文件里找到，对应的节点下标就是v节点的值，即0，若没有t属性，则v的值就是该单元格的内容
// * <v>0</v> </c> <c r="B1" t="s"> <v>1</v> </c> <c
// * r="C1" t="s"> <v>2</v> </c> </row>
// */
// // <row r="3" spans="1:5">
// // <c r="E3" t="s">
// // <v>2</v>
// // </c>
// // </row>
// // <row r="5" spans="1:5">
// // <c r="D5">
// // <v>222</v>
// // </c>
// // </row>
// Element row = (Element) rowdata.item(j);
// // 根据行得到每个行中的列
// NodeList columndata = row.getElementsByTagName("c");
// Object[] rowValue = new Object[columnNumber];
// for (int k = 0; k < columndata.getLength(); k++) {
// Element column = (Element) columndata.item(k);
// NodeList values = column.getElementsByTagName("v");
// Element value = (Element) values.item(0);
// if (value != null) {
// int trueColumn = getColumnIndex(column.getAttribute("r")) - 1;
// // 如果是共享字符串则在sharedstring.xml里查找该列的值
// if (column.getAttribute("t") != null &
// column.getAttribute("t").equals("s")) {
// if (sharedStrings != null) {
// rowValue[trueColumn] =
// sharedStrings[Integer.parseInt(value.getTextContent())];
// }
// }
// else {
// Object cellValue = value.getTextContent();
// // 对于小数,2007
// cellValue = getDateFromStr(column.getAttribute("s"),
// cellValue.toString());
// rowValue[trueColumn] = cellValue;
// }
// }
// }
// excelValue.add(rowValue);
// }
// // 构建1个sheet模型
// EpointSheet sheet = new EpointSheet(sheetName, excelValue);
// result.add(sheet);
// }
// }
// }
// catch (Exception e) {
// e.printStackTrace();
// }
// return result;
// }

private static Object getDateFromStr(String s, String cellValue) {
Object date = cellValue;
// 长日期
if ("2".equals(s)) {
date = doubleDate2Str(cellValue, 1);
}
// 短日期
else if ("1".equals(s)) {
date = doubleDate2Str(cellValue, 0);
}
return date;
}

// /**
// * 根据excel的列定义来解析列数:A,B,C,D-----AA,AB,AC---AAA,AAB---
// *
// * @param columnDefine
// * 列定义
// * @return int具体的index
// */
// private static int getColumnIndex(String columnDefine) {
// String column = columnDefine;
// if (columnDefine.indexOf(":") != -1) {
// column = columnDefine.substring(columnDefine.indexOf(":") + 1,
// columnDefine.length());
// }
// // 找到第一个数字出现的位置
// int index = column.length();
// for (int i = 0; i < column.length(); i++) {
// if (Character.isDigit(column.charAt(i))) {
// index = i;
// break;
// }
// }
// // 截掉数字,成为真正的列定义
// column = column.substring(0, index);
// char[] text = column.toCharArray();
// int count = 0;
// int length = text.length;
// for (int i = 0; i < length; i++) {
// // 26的指数次幂*该字符所在位置
// switch (text[i]) {
// case 'A':
// count += Math.pow(26, length - 1 - i);
// break;
// case 'B':
// count += Math.pow(26, (length - 1 - i)) * 2;
// break;
// case 'C':
// count += Math.pow(26, (length - 1 - i)) * 3;
// break;
// case 'D':
// count += Math.pow(26, (length - 1 - i)) * 4;
// break;
// case 'E':
// count += Math.pow(26, (length - 1 - i)) * 5;
// break;
// case 'F':
// count += Math.pow(26, (length - 1 - i)) * 6;
// break;
// case 'G':
// count += Math.pow(26, (length - 1 - i)) * 7;
// break;
// case 'H':
// count += Math.pow(26, (length - 1 - i)) * 8;
// break;
// case 'I':
// count += Math.pow(26, (length - 1 - i)) * 9;
// break;
// case 'J':
// count += Math.pow(26, (length - 1 - i)) * 10;
// break;
// case 'K':
// count += Math.pow(26, (length - 1 - i)) * 11;
// break;
// case 'L':
// count += Math.pow(26, (length - 1 - i)) * 12;
// break;
// case 'M':
// count += Math.pow(26, (length - 1 - i)) * 13;
// break;
// case 'N':
// count += Math.pow(26, (length - 1 - i)) * 14;
// break;
// case 'O':
// count += Math.pow(26, (length - 1 - i)) * 15;
// break;
// case 'P':
// count += Math.pow(26, (length - 1 - i)) * 16;
// break;
// case 'Q':
// count += Math.pow(26, (length - 1 - i)) * 17;
// break;
// case 'R':
// count += Math.pow(26, (length - 1 - i)) * 18;
// break;
// case 'S':
// count += Math.pow(26, (length - 1 - i)) * 19;
// break;
// case 'T':
// count += Math.pow(26, (length - 1 - i)) * 20;
// break;
// case 'U':
// count += Math.pow(26, (length - 1 - i)) * 21;
// break;
// case 'V':
// count += Math.pow(26, (length - 1 - i)) * 22;
// break;
// case 'W':
// count += Math.pow(26, (length - 1 - i)) * 23;
// break;
// case 'X':
// count += Math.pow(26, (length - 1 - i)) * 24;
// break;
// case 'Y':
// count += Math.pow(26, (length - 1 - i)) * 25;
// break;
// case 'Z':
// count += Math.pow(26, (length - 1 - i)) * 26;
// break;
// default:
// break;
// }
// }
// return count;
// }

/**
* 转换excel中间日期类型的文本为日期格式
*
* @param datestr
* 日期的文本值
* @param type
* 0和1,0为短日期,1为长日期
* @return Date 日期
*/
private static Date doubleDate2Str(String datestr, int type) {
String sss = null;
SimpleDateFormat sdf = null;
if (type == 0) {
sss = "1900-01-01";
sdf = new SimpleDateFormat("yyyy-MM-dd");
}
else {
sss = "1900-01-01 00:00";
sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm");
}
Calendar c = Calendar.getInstance();
try {
c.setTime(sdf.parse(sss));
}
catch (ParseException e) {
e.printStackTrace();
}
String[] st = datestr.split("\\.");
int i1 = Integer.parseInt(st[0]);
// TODO不知道为什么多了2天--张剑峰 2011/4/7 暂时-2处理下
i1 -= 2;
c.add(Calendar.DAY_OF_MONTH, i1);
if (st.length == 2) {
String str = "0." + st[1];
long i2 = (long) (Double.parseDouble(str) * 24 * 3600);
for (int i = 0; i < (i2 / Integer.MAX_VALUE); i++) {
c.add(Calendar.SECOND, Integer.MAX_VALUE);
}
int iii = (int) (i2 % Integer.MAX_VALUE);
c.add(Calendar.SECOND, iii);
}
return c.getTime();
}

public static void main(String[] args) {
List<EpointSheet> allSheet = ExcelParser.parseExcel("d:\\test.xlsx");
if (allSheet != null) {
for (EpointSheet sheet : allSheet) {
List<Object[]> excelValue = sheet.getColumnValue();
if (excelValue != null) {
int rows = excelValue.size() + 1;
for (int i = 0; i < rows; i++) {
// 表头
if (i == 0) {
Object[] head = sheet.getHeader();
for (Object item : head) {
System.out.println("表头----------》" + item.toString());
}
}
// 列值
else {
for (Object[] item : excelValue) {
for (Object value : item) {
if (value == null) {
value = "";
}
System.out.println("列值----------》" + value.toString());
}
}
break;
}
}
}
}
}
}
}

package com.epoint.utility.excel;

import java.util.List;

/**
* epoint excel模型类
*
* @作者 komojoemary
* @version [版本号, 2010-12-2]
* @see [相关类/方法]
* @since [产品/模块版本]
*/
public class EpointSheet
{

/**
* sheet名字
*/
private String sheetName = null;

/**
* excel列头
*/
private Object[] header = null;

/**
* excel列值
*/
private List<Object[]> columnValue = null;

/**
* 是否导出所有数据
*/
private Boolean pagesAll = null;

/**
* 页码
*/
private int pageNumber = 0;

/**
* 一页记录数
*/
private int pageSize = 0;

public EpointSheet(String sheetName, List<Object[]> excelValue) {
this.sheetName = sheetName;
if (excelValue != null) {
int rows = excelValue.size();
if (rows > 0) {
header = excelValue.get(0);
excelValue.remove(0);
columnValue = excelValue;
}
}
}

public Object[] getHeader() {
return header;
}

public void setHeader(Object[] header) {
this.header = header;
}

public List<Object[]> getColumnValue() {
return columnValue;
}

public void setColumnValue(List<Object[]> columnValue) {
this.columnValue = columnValue;
}

public Boolean getPagesAll() {
return pagesAll;
}

public void setPagesAll(Boolean pagesAll) {
this.pagesAll = pagesAll;
}

public int getPageNumber() {
return pageNumber;
}

public void setPageNumber(int pageNumber) {
this.pageNumber = pageNumber;
}

public int getPageSize() {
return pageSize;
}

public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}

public String getSheetName() {
return sheetName;
}

public void setSheetName(String sheetName) {
this.sheetName = sheetName;
}

bug:

公司原来在导入execel文件时，是一个一个导入的；也即，每一个导入文件都要有对应的java代码实现，所以在读取execel文件的时候没出现什么问题，这两天，准备把以前的做法废掉，统一封装一下，以达到只要根据配置文件就能解析所有的execel文件并将结果返回。

在实现过程中，刚开始时用demo写的，所以execel文件是全路径的（类似于c://test/XXX.xls),解析的时候需要两次打开同一个execel文件，不过都不会出现问题。后来移植到到项目中去的时候，由于是直接通过网页导入的，只能获取其InputStream流，不能获取其绝对路径。由于需要两次用到inputStream，故就出现了Unable to read entire header; 0 bytes read; expected 512 bytes 这样的问题。

网上找了好久，很多解决方案都不行，后来才想到InputStrem流只能用一次，第二次就失效了，才导致了以上的问题。解决的方法有两种，（参考http://simpleframework.net/bbs/835/27640.html），我选择首先将这个inputstrem流用字节数字存起来，要用的时候再用ByteArrayInputStream封装（参考http://hiok.blog.sohu.com/65555266.html),具体如下：

[java] view plain copy print ?

//一开始，就把流对象用字节数字保存起来
byte[] buf = org.apache.commons.io.IOUtils.toByteArray(execelIS)//execelIS为InputStream流
/
//在需要用到InputStream的地方再封装成InputStream
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(buf);
Workbook wb = new HSSFWorkbook(byteArrayInputStream);//byteArrayInputStream 继承了InputStream，故这样用并没有问题
///其它要用到的地方也是同样的原理
//主：如果只需要用到一次inputstream流，就不用这样啦，直接用就OK

转载于:https://my.oschina.net/u/1398304/blog/315869