最近几天忙于Excel的操作,用到一些东西,记录下来。
第一步直奔http://search.maven.org/下载一些POI的jar,其中包括poi-3.8、poi-ooxml-3.8、poi-ooxml-schemas-3.8。
先建立2个对象一个行对象,一个列对象
package com.zskx.pem.standalone.excel;
/**
* Excel列对象
* @author yuzhang
*/
public class XCell {
private int rowIndex;
private int columnIndex;
private String value;
public int getRowIndex() {
return rowIndex;
}
public void setRowIndex(int rowIndex) {
this.rowIndex = rowIndex;
}
public int getColumnIndex() {
return columnIndex;
}
public void setColumnIndex(int columnIndex) {
this.columnIndex = columnIndex;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}
package com.zskx.pem.standalone.excel;
import java.util.ArrayList;
import java.util.List;
/**
* Excel行对象
* @author yuzhang
*/
public class XRow {
private int rowIndex;
private List<XCell> cells=new ArrayList<XCell>();
public int getRowIndex() {
return rowIndex;
}
public void setRowIndex(int rowIndex) {
this.rowIndex = rowIndex;
}
public int getCellsSize() {
return cells.size();
}
public XRow addCell(XCell cell){
this.cells.add(cell);
return this;
}
public XCell getCell(int cellIndex){
return cells.get(cellIndex);
}
}
然后建立处理行数据的接口
package com.zskx.pem.standalone.excel;
import java.io.IOException;
/**
* 行数据处理接口
* @author yuzhang
*/
public interface ExcelRowProcessor{
public void process() throws Exception;
public void stop() throws IOException;
}
建立建立个2003处理器
package com.zskx.pem.standalone.excel;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public abstract class Excel2003RowProcessor implements ExcelRowProcessor {
private MyHSSFListener hssfListener;
private File file;
private InputStream is;
private POIFSFileSystem fs;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/** So we known which sheet we're on */
private int sheetIndex = -1;
private int optSheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
private ArrayList<BoundSheetRecord> boundSheetRecords = new ArrayList<BoundSheetRecord>();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private XRow row = new XRow();
/**
* 构造Excel-2003行级解析器
*
* @param filename
* excel完整文件名
* @throws IOException
*/
public Excel2003RowProcessor(File file) throws Exception {
if (file.getName().endsWith(".xlsx")) {
throw new Exception("Excel板式与解析器不匹配,解析器仅支持Excel-2003及以下版本。");
}
this.file = file;
this.hssfListener = new MyHSSFListener();
}
/**
* 处理指定索引的sheet,-1则表示处理所有sheet
*
* @param optSheetIndex
* @throws IOException
*/
private void process(int optSheetIndex) throws IOException {
this.optSheetIndex = optSheetIndex;
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(hssfListener);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* 处理所有sheet
*/
public void process() throws IOException {
this.is = new FileInputStream(file);
this.fs = new POIFSFileSystem(is);
sheetIndex = -1;
this.process(-1);
}
/**
* 处理指定索引的sheet
*/
public void processByRow(int optSheetIndex) throws Exception {
this.is = new FileInputStream(file);
this.fs = new POIFSFileSystem(is);
sheetIndex = -1;
if (optSheetIndex < 1) {
throw new Exception("目标sheet索引至少要从1开始。");
}
this.process(optSheetIndex);
}
public void stop() throws IOException {
if (is != null) {
is.close();
}
}
/**
* 处理行数据的策略
*/
public abstract void processRow(XRow row);
/**
* 辅助实现类,HSSF监听器
*
* @author zhangchaofeng
* @version 1.0
* @date Sep 28, 2011
*/
private class MyHSSFListener implements HSSFListener {
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add((BoundSheetRecord) record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// Create sub workbook if required
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
// Output the worksheet name
// Works by ordering the BSRs by the location of
// their BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
sheetIndex++;
// System.out.println("锁定一个sheet:"+sheetIndex+",要操作的sheet:"+optSheetIndex);
/*
* if(sheetIndex!=optSheetIndex){
* System.out.println("不是要操作的sheet。"); return; }
*/
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
}
break;
case StringRecord.sid:
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
thisStr = lrec.getValue();
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
thisStr = '"' + "(No SST Record, can't identify string)" + '"';
} else {
thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
// Format
thisStr = formatListener.formatNumberDateCell(numrec);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}
// 如果不是要操作的sheet,跳过
if ((sheetIndex + 1) != optSheetIndex && optSheetIndex != -1) {
// System.out.println("不是要操作的sheet。");
return;
}
// Handle new row
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// Handle missing column
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
thisRow = mc.getRow();
thisColumn = mc.getColumn();
thisStr = "";
}
// If we got something to print out, do so
if (thisStr != null) {
if (thisColumn > 0) {
// output.print(',');
}
row.setRowIndex(thisRow);
XCell cell = new XCell();
cell.setValue(thisStr);
cell.setColumnIndex(thisColumn);
row.addCell(cell);
// output.print(thisStr);
}
// Update column and row count
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// Handle end of row
if (record instanceof LastCellOfRowDummyRecord) {
// Print out any missing commas if needed
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
// We're onto a new row
lastColumnNumber = -1;
// End the row
if (!isBlankRow(row)) {
processRow(row);
}
row = new XRow();
// output.println();
}
}
private boolean isBlankRow(XRow row) {
boolean b = true;
for (int i = 0; i < row.getCellsSize(); i++) {
XCell cell = row.getCell(i);
if (cell.getValue().equals("") || cell.getValue() == null) {
b = false;
}
}
return b;
}
}
}
再建立个2007处理器
package com.zskx.pem.standalone.excel;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public abstract class Excel2007RowProcessor extends DefaultHandler implements ExcelRowProcessor {
private SharedStringsTable sst;
private OPCPackage pkg;
private String lastContents;
private boolean nextIsString;
private int sheetIndex = -1;
private List<String> rowlist = new ArrayList<String>();
private int curRow = 0;
private int curCol = 0;
public Excel2007RowProcessor(OPCPackage pkg) {
this.pkg = pkg;
}
// excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
// public abstract void optRows(int curRow, List<String> rowlist) throws
// SQLException ;
// excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
public abstract void optRows(XRow row) throws Exception;
// 只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
public void processOneSheet(int sheet) throws Exception {
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// 默认以rId+加sheet下标
InputStream sheet2 = r.getSheet("rId" + sheet);
sheetIndex++;
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
/**
* 遍历 excel 文件
*/
public void process() throws Exception {
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
// c => 单元格
if (name.equals("c")) {
// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// 置空
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if (nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
} catch (Exception e) {
}
}
// t 是数据标签
// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
if (name.equals("v")) {
String value = lastContents.trim();
value = value.equals("") ? " " : value;
rowlist.add(curCol, value);
curCol++;
} else if (name.equals("t")) {
String value = lastContents.trim();
value = value.equals("") ? " " : value;
rowlist.add(curCol, value);
curCol++;
} else {
// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
if (name.equals("row")) {
try {
XRow row = new XRow();
for (int i = 0; i < rowlist.size(); i++) {
XCell cell = new XCell();
cell.setColumnIndex(i);
cell.setRowIndex(curRow);
cell.setValue((String) rowlist.get(i));
row.addCell(cell);
}
if (curRow > 0)
optRows(row);
} catch (Exception e) {
e.printStackTrace();
}
rowlist.clear();
curRow++;
curCol = 0;
}
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
// 得到单元格内容的值
lastContents += new String(ch, start, length);
}
public void stop() throws IOException {
if (pkg != null) {
pkg.close();
}
}
}
解析来就是重点了,找到合适的处理器处理,然后建立一个读取行数据的抽象方法
package com.zskx.pem.standalone.excel;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.poi.openxml4j.opc.OPCPackage;
/**
* excel处理器
* @author yuzhang
*/
public abstract class ExcelProcessor implements ExcelRowProcessor{
private ExcelRowProcessor processor;
public ExcelProcessor(File file) throws Exception{
if(file.getName()==null||file.getName().equals("")){
throw new Exception("构造Excel导入器失败,未指定文件全名。");
}
if(!file.exists()){
throw new Exception("构造Excel导入器失败,指定的文件不存在:"+file.getName());
}
try {
OPCPackage op=OPCPackage.open(new FileInputStream(file));
processor=new MyExcel2007RowProcessor(op);
} catch (Exception e1) {
processor=new MyExcel2003RowProcessor(file);
e1.printStackTrace();
}
stop();
}
@Override
public void process() throws Exception {
processor.process();
}
@Override
public void stop() throws IOException {
processor.stop();
}
public abstract void processRow(XRow row);
private class MyExcel2003RowProcessor extends Excel2003RowProcessor{
public MyExcel2003RowProcessor(File file) throws Exception {
super(file);
}
@Override
public void processRow(XRow row) {
ExcelProcessor.this.processRow(row);
}
}
private class MyExcel2007RowProcessor extends Excel2007RowProcessor{
public MyExcel2007RowProcessor(OPCPackage pkg) throws Exception {
super(pkg);
}
@Override
public void optRows(XRow row) {
ExcelProcessor.this.processRow(row);
}
}
}
最后就是建立读取行数据的类了
package com.zskx.pem.standalone.excel;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ReadExcel extends ExcelProcessor {
public ReadExcel(File file) throws Exception {
super(file);
}
/**
* 在这里处理数据
*/
@Override
public void processRow(XRow row) {
for (int i = 0; i < row.getCellsSize(); i++) {
System.out.print("[" + row.getRowIndex() + "," + row.getCell(i).getColumnIndex() + ","
+ row.getCell(i).getValue() + "]");
setUserInfoByIndex(row.getCell(i).getColumnIndex(), row.getCell(i).getValue());
}
System.out.println();
}
public static void readExcel(File file) {
try {
ReadExcel reader = new ReadExcel(file);
reader.process();// 处理所有的sheet
} catch (Exception e) {
e.printStackTrace();
}
// reader.stop();//运行一半需要停止调用此方法,释放文件流,正常运行完毕会自动释放
}
}
打工搞成。