excel2007文件格式与之前版本不同,之前版本采用的是微软自己的存储格式。07版内容的存储采用XML格式,所以,理所当然的,对大数据量的xlsx文件的读取采用的也是XML的处理方式SAX。
同之前的版本一样,大数据量文件的读取采用的是事件模型eventusermodel。usermodel模式需要将文件一次性全部读到内存中,07版的既然采用的存储模式是xml,解析用的DOM方式也是如此,这种模式操作简单,容易上手,但是对于大量数据占用的内存也是相当可观,在Eclipse中经常出现内存溢出。
下面就是采用eventusermodel对07excel文件读取。
同上篇,我将当前行的单元格数据存储到List中,抽象出 optRows 方法,该方法会在每行末尾时调用,方法参数为当前行索引curRow(int型)及存有行内单元格数据的List。继承类只需实现该行级方法即可。
补充:今天发现 读取2007的脚本存在存在一处问题,在遇到空单元格时会跳过该单元格,由于工作紧张没有时间去解决该问题,这里给出一个暂时的处理办法。打开文件,在开始菜单中选择"查找和选择","定位条件",选择"空值",确定,这时会找出所有的空单元格,直接按空格,然后Ctrl+enter,就会将所有空单元格填入一个空格,保存即可。
2010.6.10补充:空单元格的问题已经解决,在2007的文档中空单元格是不存储的,单元格的r属性是单元格位置信息,格式为[A-Z]+[0-9]+。字母部分是列索引,数字部分是行索引。
抽象类:XxlsAbstract ,作用:遍历excel文件,提供行级操作方法 optRows
- package com.gaosheng.util.xls;
- import java.io.InputStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import org.apache.poi.xssf.eventusermodel.XSSFReader;
- import org.apache.poi.xssf.model.SharedStringsTable;
- import org.apache.poi.xssf.usermodel.XSSFRichTextString;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- import org.xml.sax.helpers.XMLReaderFactory;
- /**
- * XSSF and SAX (Event API)
- */
- public abstract class XxlsAbstract extends DefaultHandler {
- private SharedStringsTable sst;
- private String lastContents;
- private boolean nextIsString;
- private int sheetIndex = -1;
- private List<String> rowlist = new ArrayList<String>();
- private int curRow = 0; //当前行
- private int curCol = 0; //当前列索引
- private int preCol = 0; //上一列列索引
- private int titleRow = 0; //标题行,一般情况下为0
- private int rowsize = 0; //列数
- //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
- // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
- //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
- //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
- public void processOneSheet(String filename,int sheetId) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- // rId2 found by processing the Workbook
- // 根据 rId# 或 rSheet# 查找sheet
- InputStream sheet2 = r.getSheet("rId"+sheetId);
- sheetIndex++;
- InputSource sheetSource = new InputSource(sheet2);
- parser.parse(sheetSource);
- sheet2.close();
- }
- /**
- * 遍历 excel 文件
- */
- public void process(String filename) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- Iterator<InputStream> sheets = r.getSheetsData();
- while (sheets.hasNext()) {
- curRow = 0;
- sheetIndex++;
- InputStream sheet = sheets.next();
- InputSource sheetSource = new InputSource(sheet);
- parser.parse(sheetSource);
- sheet.close();
- }
- }
- public XMLReader fetchSheetParser(SharedStringsTable sst)
- throws SAXException {
- XMLReader parser = XMLReaderFactory
- .createXMLReader("org.apache.xerces.parsers.SAXParser");
- this.sst = sst;
- parser.setContentHandler(this);
- return parser;
- }
- public void startElement(String uri, String localName, String name,
- Attributes attributes) throws SAXException {
- // c => 单元格
- if (name.equals("c")) {
- // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
- String cellType = attributes.getValue("t");
- String rowStr = attributes.getValue("r");
- curCol = this.getRowIndex(rowStr);
- if (cellType != null && cellType.equals("s")) {
- nextIsString = true;
- } else {
- nextIsString = false;
- }
- }
- // 置空
- lastContents = "";
- }
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- // 根据SST的索引值的到单元格的真正要存储的字符串
- // 这时characters()方法可能会被调用多次
- if (nextIsString) {
- try {
- int idx = Integer.parseInt(lastContents);
- lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
- .toString();
- } catch (Exception e) {
- }
- }
- // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
- // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
- if (name.equals("v")) {
- String value = lastContents.trim();
- value = value.equals("")?" ":value;
- int cols = curCol-preCol;
- if (cols>1){
- for (int i = 0;i < cols-1;i++){
- rowlist.add(preCol,"");
- }
- }
- preCol = curCol;
- rowlist.add(curCol-1, value);
- }else {
- //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
- if (name.equals("row")) {
- int tmpCols = rowlist.size();
- if(curRow>this.titleRow && tmpCols<this.rowsize){
- for (int i = 0;i < this.rowsize-tmpCols;i++){
- rowlist.add(rowlist.size(), "");
- }
- }
- try {
- optRows(sheetIndex,curRow,rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- if(curRow==this.titleRow){
- this.rowsize = rowlist.size();
- }
- rowlist.clear();
- curRow++;
- curCol = 0;
- preCol = 0;
- }
- }
- }
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- //得到单元格内容的值
- lastContents += new String(ch, start, length);
- }
- //得到列索引,每一列c元素的r属性构成为字母加数字的形式,字母组合为列索引,数字组合为行索引,
- //如AB45,表示为第(A-A+1)*26+(B-A+1)*26列,45行
- public int getRowIndex(String rowStr){
- rowStr = rowStr.replaceAll("[^A-Z]", "");
- byte[] rowAbc = rowStr.getBytes();
- int len = rowAbc.length;
- float num = 0;
- for (int i=0;i<len;i++){
- num += (rowAbc[i]-'A'+1)*Math.pow(26,len-i-1 );
- }
- return (int) num;
- }
- public int getTitleRow() {
- return titleRow;
- }
- public void setTitleRow(int titleRow) {
- this.titleRow = titleRow;
- }
- }
继承类:XxlsBig,作用:将数据转出到数据库临时表
- package com.gaosheng.util.examples.xls;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.List;
- import java.util.Properties;
- import com.gaosheng.util.xls.XxlsAbstract;
- public class XxlsBig extends XxlsAbstract {
- public static void main(String[] args) throws Exception {
- XxlsBig howto = new XxlsBig("temp_table");
- howto.processOneSheet("F:/new.xlsx",1);
- howto.process("F:/new.xlsx");
- howto.close();
- }
- public XxlsBig(String tableName) throws SQLException{
- this.conn = getNew_Conn();
- this.statement = conn.createStatement();
- this.tableName = tableName;
- }
- private Connection conn = null;
- private Statement statement = null;
- private PreparedStatement newStatement = null;
- private String tableName = "temp_table";
- private boolean create = true;
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- if (sheetIndex == 0 && curRow == 0) {
- StringBuffer preSql = new StringBuffer("insert into " + tableName
- + " values(");
- StringBuffer table = new StringBuffer("create table " + tableName
- + "(");
- int c = rowlist.size();
- for (int i = 0; i < c; i++) {
- preSql.append("?,");
- table.append(rowlist.get(i));
- table.append(" varchar2(100) ,");
- }
- table.deleteCharAt(table.length() - 1);
- preSql.deleteCharAt(preSql.length() - 1);
- table.append(")");
- preSql.append(")");
- if (create) {
- statement = conn.createStatement();
- try{
- statement.execute("drop table "+tableName);
- }catch(Exception e){
- }finally{
- System.out.println("表 "+tableName+" 删除成功");
- }
- if (!statement.execute(table.toString())) {
- System.out.println("创建表 "+tableName+" 成功");
- // return;
- } else {
- System.out.println("创建表 "+tableName+" 失败");
- return;
- }
- }
- conn.setAutoCommit(false);
- newStatement = conn.prepareStatement(preSql.toString());
- } else if(curRow>0) {
- // 一般行
- int col = rowlist.size();
- for (int i = 0; i < col; i++) {
- newStatement.setString(i + 1, rowlist.get(i).toString());
- }
- newStatement.addBatch();
- if (curRow % 1000 == 0) {
- newStatement.executeBatch();
- conn.commit();
- }
- }
- }
- private static Connection getNew_Conn() {
- Connection conn = null;
- Properties props = new Properties();
- FileInputStream fis = null;
- try {
- fis = new FileInputStream("D:/database.properties");
- props.load(fis);
- DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver());
- // String jdbcURLString =
- // "jdbc:oracle:thin:@192.168.0.28:1521:orcl";
- StringBuffer jdbcURLString = new StringBuffer();
- jdbcURLString.append("jdbc:oracle:thin:@");
- jdbcURLString.append(props.getProperty("host"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("port"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("database"));
- conn = DriverManager.getConnection(jdbcURLString.toString(), props
- .getProperty("user"), props.getProperty("password"));
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- try {
- fis.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return conn;
- }
- public int close() {
- try {
- newStatement.executeBatch();
- conn.commit();
- System.out.println("数据写入完毕");
- this.newStatement.close();
- this.statement.close();
- this.conn.close();
- return 1;
- } catch (SQLException e) {
- return 0;
- }
- }
- }
继承类:XxlsPrint,作用:将数据输出到控制台
- package com.gaosheng.util.examples.xls;
- import java.sql.SQLException;
- import java.util.List;
- import com.gaosheng.util.xls.XxlsAbstract;
- public class XxlsPrint extends XxlsAbstract {
- @Override
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- for (int i = 0; i < rowlist.size(); i++) {
- System.out.print("'" + rowlist.get(i) + "',");
- }
- System.out.println();
- }
- public static void main(String[] args) throws Exception {
- XxlsPrint howto = new XxlsPrint();
- howto.processOneSheet("F:/new.xlsx",1);
- // howto.processAllSheets("F:/new.xlsx");
- }
- }
源代码在附件中,还包含了说明文件、数据库配置文件、以及整合xls文件和xlsx文件读取的类:Xls2Do。
小弟刚进公司,目前有一项任务要把客户数据迁移到数据库中,由于客户提供的数据都存储在excel中,有些文件数据量还很大,在usermodel模式下经常内存溢出,于是只能采用直接通过pl/sql往数据库复制或是用eventusermodel模式读取。直接复制倒是简单,但是速度太慢,一次复制的太多也会内存溢出,我没那耐心,没办法,只能用第二种办法了。在google上找,发现相关示例太少了,幸好在docjar找到了一个示例,自己又改了一下,把原来的例子改为抽象类,提供了一个 optRows() 方法来对行级数据进行操作。
usermodel模式对excel操作前需要将文件全部转入内存,对较大文件来说内存开销很大。但是其使用简单。
eventusermodel模式采用事件模型,对文件边读取边处理,内存消耗较低,效率高,因为不用等待文件全部装入内存。但使用较复杂。
补充:今天发现 读取2007的脚本存在存在一处问题,在遇到空单元格时会跳过该单元格,由于工作紧张没有时间去解决该问题,这里给出一个暂时的处理办法。打开文件,在开始菜单中选择"查找和选择","定位条件",选择"空值",确定,这时会找出所有的空单元格,直接按空格,然后Ctrl+enter,就会将所有空单元格填入一个空格,保存即可。
下面展示的是excel2003及其之前版本的大文件读取方法。
抽象类 HxlsAbstract:
- package com.gaosheng.util.xls;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.List;
- import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
- import org.apache.poi.hssf.eventusermodel.HSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFRequest;
- import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
- import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
- import org.apache.poi.hssf.model.HSSFFormulaParser;
- import org.apache.poi.hssf.record.BOFRecord;
- import org.apache.poi.hssf.record.BlankRecord;
- import org.apache.poi.hssf.record.BoolErrRecord;
- import org.apache.poi.hssf.record.BoundSheetRecord;
- import org.apache.poi.hssf.record.FormulaRecord;
- import org.apache.poi.hssf.record.LabelRecord;
- import org.apache.poi.hssf.record.LabelSSTRecord;
- import org.apache.poi.hssf.record.NoteRecord;
- import org.apache.poi.hssf.record.NumberRecord;
- import org.apache.poi.hssf.record.RKRecord;
- import org.apache.poi.hssf.record.Record;
- import org.apache.poi.hssf.record.SSTRecord;
- import org.apache.poi.hssf.record.StringRecord;
- import org.apache.poi.hssf.usermodel.HSSFWorkbook;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- public abstract class HxlsAbstract implements HSSFListener {
- private int minColumns;
- private POIFSFileSystem fs;
- private PrintStream output;
- private int lastRowNumber;
- private int lastColumnNumber;
- /** Should we output the formula, or the value it has? */
- private boolean outputFormulaValues = true;
- /** For parsing Formulas */
- private SheetRecordCollectingListener workbookBuildingListener;
- private HSSFWorkbook stubWorkbook;
- // Records we pick up as we process
- private SSTRecord sstRecord;
- private FormatTrackingHSSFListener formatListener;
- /** So we known which sheet we're on */
- private int sheetIndex = -1;
- private BoundSheetRecord[] orderedBSRs;
- @SuppressWarnings("unchecked")
- private ArrayList boundSheetRecords = new ArrayList();
- // For handling formulas with string results
- private int nextRow;
- private int nextColumn;
- private boolean outputNextStringRecord;
- private int curRow;
- private List<String> rowlist;
- @SuppressWarnings( "unused")
- private String sheetName;
- public HxlsAbstract(POIFSFileSystem fs)
- throws SQLException {
- this.fs = fs;
- this.output = System.out;
- this.minColumns = -1;
- this.curRow = 0;
- this.rowlist = new ArrayList<String>();
- }
- public HxlsAbstract(String filename) throws IOException,
- FileNotFoundException, SQLException {
- this(new POIFSFileSystem(new FileInputStream(filename)));
- }
- //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
- // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
- //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
- /**
- * 遍历 excel 文件
- */
- public void process() throws IOException {
- MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
- this);
- formatListener = new FormatTrackingHSSFListener(listener);
- HSSFEventFactory factory = new HSSFEventFactory();
- HSSFRequest request = new HSSFRequest();
- if (outputFormulaValues) {
- request.addListenerForAllRecords(formatListener);
- } else {
- workbookBuildingListener = new SheetRecordCollectingListener(
- formatListener);
- request.addListenerForAllRecords(workbookBuildingListener);
- }
- factory.processWorkbookEvents(request, fs);
- }
- /**
- * HSSFListener 监听方法,处理 Record
- */
- @SuppressWarnings("unchecked")
- public void processRecord(Record record) {
- int thisRow = -1;
- int thisColumn = -1;
- String thisStr = null;
- String value = null;
- switch (record.getSid()) {
- case BoundSheetRecord.sid:
- boundSheetRecords.add(record);
- break;
- case BOFRecord.sid:
- BOFRecord br = (BOFRecord) record;
- if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
- // Create sub workbook if required
- if (workbookBuildingListener != null && stubWorkbook == null) {
- stubWorkbook = workbookBuildingListener
- .getStubHSSFWorkbook();
- }
- // Works by ordering the BSRs by the location of
- // their BOFRecords, and then knowing that we
- // process BOFRecords in byte offset order
- sheetIndex++;
- if (orderedBSRs == null) {
- orderedBSRs = BoundSheetRecord
- .orderByBofPosition(boundSheetRecords);
- }
- sheetName = orderedBSRs[sheetIndex].getSheetname();
- }
- break;
- case SSTRecord.sid:
- sstRecord = (SSTRecord) record;
- break;
- case BlankRecord.sid:
- BlankRecord brec = (BlankRecord) record;
- thisRow = brec.getRow();
- thisColumn = brec.getColumn();
- thisStr = "";
- break;
- case BoolErrRecord.sid:
- BoolErrRecord berec = (BoolErrRecord) record;
- thisRow = berec.getRow();
- thisColumn = berec.getColumn();
- thisStr = "";
- break;
- case FormulaRecord.sid:
- FormulaRecord frec = (FormulaRecord) record;
- thisRow = frec.getRow();
- thisColumn = frec.getColumn();
- if (outputFormulaValues) {
- if (Double.isNaN(frec.getValue())) {
- // Formula result is a string
- // This is stored in the next record
- outputNextStringRecord = true;
- nextRow = frec.getRow();
- nextColumn = frec.getColumn();
- } else {
- thisStr = formatListener.formatNumberDateCell(frec);
- }
- } else {
- thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
- frec.getParsedExpression()) + '"';
- }
- break;
- case StringRecord.sid:
- if (outputNextStringRecord) {
- // String for formula
- StringRecord srec = (StringRecord) record;
- thisStr = srec.getString();
- thisRow = nextRow;
- thisColumn = nextColumn;
- outputNextStringRecord = false;
- }
- break;
- case LabelRecord.sid:
- LabelRecord lrec = (LabelRecord) record;
- curRow = thisRow = lrec.getRow();
- thisColumn = lrec.getColumn();
- value = lrec.getValue().trim();
- value = value.equals("")?" ":value;
- this.rowlist.add(thisColumn, value);
- break;
- case LabelSSTRecord.sid:
- LabelSSTRecord lsrec = (LabelSSTRecord) record;
- curRow = thisRow = lsrec.getRow();
- thisColumn = lsrec.getColumn();
- if (sstRecord == null) {
- rowlist.add(thisColumn, " ");
- } else {
- value = sstRecord
- .getString(lsrec.getSSTIndex()).toString().trim();
- value = value.equals("")?" ":value;
- rowlist.add(thisColumn,value);
- }
- break;
- case NoteRecord.sid:
- NoteRecord nrec = (NoteRecord) record;
- thisRow = nrec.getRow();
- thisColumn = nrec.getColumn();
- // TODO: Find object to match nrec.getShapeId()
- thisStr = '"' + "(TODO)" + '"';
- break;
- case NumberRecord.sid:
- NumberRecord numrec = (NumberRecord) record;
- curRow = thisRow = numrec.getRow();
- thisColumn = numrec.getColumn();
- value = formatListener.formatNumberDateCell(numrec).trim();
- value = value.equals("")?" ":value;
- // Format
- rowlist.add(thisColumn, value);
- break;
- case RKRecord.sid:
- RKRecord rkrec = (RKRecord) record;
- thisRow = rkrec.getRow();
- thisColumn = rkrec.getColumn();
- thisStr = '"' + "(TODO)" + '"';
- break;
- default:
- break;
- }
- // 遇到新行的操作
- if (thisRow != -1 && thisRow != lastRowNumber) {
- lastColumnNumber = -1;
- }
- // 空值的操作
- if (record instanceof MissingCellDummyRecord) {
- MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
- curRow = thisRow = mc.getRow();
- thisColumn = mc.getColumn();
- rowlist.add(thisColumn," ");
- }
- // 如果遇到能打印的东西,在这里打印
- if (thisStr != null) {
- if (thisColumn > 0) {
- output.print(',');
- }
- output.print(thisStr);
- }
- // 更新行和列的值
- if (thisRow > -1)
- lastRowNumber = thisRow;
- if (thisColumn > -1)
- lastColumnNumber = thisColumn;
- // 行结束时的操作
- if (record instanceof LastCellOfRowDummyRecord) {
- if (minColumns > 0) {
- // 列值重新置空
- if (lastColumnNumber == -1) {
- lastColumnNumber = 0;
- }
- }
- // 行结束时, 调用 optRows() 方法
- lastColumnNumber = -1;
- try {
- optRows(sheetIndex,curRow, rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- rowlist.clear();
- }
- }
- }
继承类: HxlsBig,作用:将excel中数据转储到数据库临时表中,实现optRows方法
- package com.gaosheng.util.examples.xls;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.List;
- import java.util.Properties;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- import com.gaosheng.util.xls.HxlsAbstract;
- public class HxlsBig extends HxlsAbstract{
- public static void main(String[] args) throws Exception {
- // XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);
- HxlsBig xls2csv = new HxlsBig("E:/up.xls","hxls_temp");
- xls2csv.process();
- xls2csv.close();
- }
- public HxlsBig(POIFSFileSystem fs, PrintStream output,String tableName)
- throws SQLException {
- super(fs);
- this.conn = getNew_Conn();
- this.statement = conn.createStatement();
- this.tableName = tableName;
- }
- public HxlsBig(String filename,String tableName) throws IOException,
- FileNotFoundException, SQLException {
- this(new POIFSFileSystem(new FileInputStream(filename)), System.out,tableName);
- }
- private Connection conn = null;
- private Statement statement = null;
- private PreparedStatement newStatement = null;
- private String tableName = "temp_table";
- private boolean create = true;
- // private int sheetIndex = 0;
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- if (curRow == 0 && sheetIndex == 0 ) {
- StringBuffer preSql = new StringBuffer("insert into " + tableName
- + " values(");
- StringBuffer table = new StringBuffer("create table " + tableName
- + "(");
- int c = rowlist.size();
- for (int i = 0; i < c; i++) {
- preSql.append("?,");
- table.append(rowlist.get(i));
- table.append(" varchar2(100) ,");
- }
- table.deleteCharAt(table.length() - 1);
- preSql.deleteCharAt(preSql.length() - 1);
- table.append(")");
- preSql.append(")");
- if (create) {
- statement = conn.createStatement();
- try{
- statement.execute("drop table "+tableName);
- }catch(Exception e){
- }finally{
- System.out.println("表 "+tableName+" 删除成功");
- }
- if (!statement.execute(table.toString())) {
- System.out.println("创建表 "+tableName+" 成功");
- // return;
- } else {
- System.out.println("创建表 "+tableName+" 失败");
- return;
- }
- }
- conn.setAutoCommit(false);
- newStatement = conn.prepareStatement(preSql.toString());
- }else if(curRow > 0) {
- // 一般行
- int col = rowlist.size();
- for (int i = 0; i < col; i++) {
- newStatement.setString(i + 1, rowlist.get(i).toString());
- }
- newStatement.addBatch();
- if (curRow % 1000 == 0) {
- newStatement.executeBatch();
- conn.commit();
- }
- }
- }
- private static Connection getNew_Conn() {
- Connection conn = null;
- Properties props = new Properties();
- FileInputStream fis = null;
- try {
- fis = new FileInputStream("D:/database.properties");
- props.load(fis);
- DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver());
- StringBuffer jdbcURLString = new StringBuffer();
- jdbcURLString.append("jdbc:oracle:thin:@");
- jdbcURLString.append(props.getProperty("host"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("port"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("database"));
- conn = DriverManager.getConnection(jdbcURLString.toString(), props
- .getProperty("user"), props.getProperty("password"));
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- try {
- fis.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return conn;
- }
- public int close() {
- try {
- newStatement.executeBatch();
- conn.commit();
- System.out.println("数据写入完毕");
- this.newStatement.close();
- this.statement.close();
- this.conn.close();
- return 1;
- } catch (SQLException e) {
- return 0;
- }
- }
- }
继承类 : HxlsPrint,作用:将excel中数据输出到控制台,实现optRows方法
- package com.gaosheng.util.examples.xls;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.sql.SQLException;
- import java.util.List;
- import com.gaosheng.util.xls.HxlsAbstract;
- public class HxlsPrint extends HxlsAbstract{
- public HxlsPrint(String filename) throws IOException,
- FileNotFoundException, SQLException {
- super(filename);
- }
- @Override
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- for (int i = 0 ;i< rowlist.size();i++){
- System.out.print("'"+rowlist.get(i)+"',");
- }
- System.out.println();
- }
- public static void main(String[] args){
- HxlsPrint xls2csv;
- try {
- xls2csv = new HxlsPrint("E:/new.xls");
- xls2csv.process();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } catch (SQLException e) {
- e.printStackTrace();
- }
- }
- }
excel2007的大数据量读取见下篇,附件中包含了2003等较早版本的xls文件的示例,也包含了2007版的示例
目前公司接的项目都有很多功能的数据都要依靠excel导入,因此对excel中数据的验证必不可少。
先来看一下目前存在的问题:
一:在每处导入的程序中都会包括excel读取、数据验证、错误数据的导出或错误的输出,每次都是拷贝、粘帖、修改,本来不同的地方只有验证部分,但由于格式、验证的不同,在修改过程中还要对excel的读取、错误的导出进行修改、调试,造成工作效率的降低和时间的浪费。
二:由于人员更替频繁,每个人的风格都不一样,对于错误的显示和输出都不一样,客户看到的结果是每处导入的地方返回的错误结果也不一样,有的只是提醒一句成功、失败,有的则会把错误的记录导出excel供客户下载修改。客户对此也有很多抱怨。
解决思路:
在excel导入中我们关心的(也是唯一不同的)是数据的验证、保存,对于读取、错误记录导出并不关心,那就该把这两部分分离出来,这样的好处有:1.导入时不再关心excel的读取和错误信息的导出,编码及调试时不再为这部分付出时间和精力,加快开发效率。2.降低耦合度,目前对excel的操作使用的是jxl,如果以后改为poi那只需要需改excel操作的实现即可。3.统一,所有的导入使用相同的excel操作实现,如果excel读取操作有bug则只需修改一处(写此代码的起因就是一个同事出现的一个bug引起的),而且对错误记录的输出也有统一的输出。
解决办法:
限于本人的表达能力,要想讲清楚太费时间和篇幅了,在这里就直接上代码了
首先是抽象类 ImportDataMultiSheet,包括excel读取、错误记录导出的实现,支持多sheet及合并单元格的处理
- import java.io.File;
- import java.io.IOException;
- import java.lang.reflect.Method;
- import java.text.DateFormat;
- import java.text.SimpleDateFormat;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Locale;
- import java.util.Map;
- import java.util.TimeZone;
- import org.gaosheng.util.exception.EntityException;
- import jxl.Cell;
- import jxl.CellType;
- import jxl.DateCell;
- import jxl.NumberCell;
- import jxl.Range;
- import jxl.Sheet;
- import jxl.Workbook;
- import jxl.format.Colour;
- import jxl.write.Label;
- import jxl.write.WritableCellFormat;
- import jxl.write.WritableFont;
- import jxl.write.WritableSheet;
- import jxl.write.WritableWorkbook;
- /**
- * jxl导入excel类,继承类实现验证方法
- *
- * @author gaosheng
- *
- */
- public abstract class ImportDataMultiSheet {
- private int startRow = 1;
- private int startColumn = 0;
- private int minRows = 1;
- private int minColumns = 1;
- private int maxRows = -1;
- private int maxColumns = -1;
- private Map<Integer, Method> methodMap;
- private Map<Integer, List<String>> holdColumns;
- private List<String>[] titiles ;
- private List<MeregRange> meregRangeList ;
- private Cell curCell;
- private Cell[] curRowCells;
- private int successcount = 0;
- private String[] columnMethods = null;
- private int[] needHoldColumns = null;
- private File importExcel;
- private File errorExcel;
- private boolean hasError = false;
- private List<String> errors = new ArrayList<String>();
- /**
- * 启动导入
- *
- * @return boolean
- * @throws SecurityException
- * @throws NoSuchMethodException
- * @throws EntityException
- */
- public boolean execute() throws SecurityException, NoSuchMethodException,
- EntityException {
- setMethodMap();
- setHoldColumns();
- Workbook work = null;
- try {
- work = Workbook.getWorkbook(importExcel);
- } catch (Exception e) {
- throw new EntityException("Excel表格读取异常!批量导入失败!<br/>");
- }
- //数据总行数
- int totalRows = 0;
- Sheet sheet = null;
- WritableWorkbook writew = null;
- WritableSheet writes = null;
- int sheet_num = work.getNumberOfSheets();
- // 全局验证
- if (!this.validGlobal(work.getSheets())) {
- throw new EntityException("导入文件格式错误");
- }
- try {
- for (int sheet_index = 0; sheet_index < sheet_num ;sheet_index++) {
- sheet = work.getSheet(sheet_index);
- meregRangeList = new ArrayList<MeregRange>();
- int columns = sheet.getColumns();
- int rows = sheet.getRows();
- totalRows += rows;
- for (Range range : sheet.getMergedCells()) {
- Cell topleft = range.getTopLeft();
- Cell bottomRight = range.getBottomRight();
- meregRangeList.add(new MeregRange(topleft.getRow(),topleft.getColumn(),bottomRight.getRow(),bottomRight.getColumn(),getCellValue(topleft)));
- }
- writew = Workbook.createWorkbook(errorExcel);
- writes = writew.createSheet("ErrorReport", 0);
- Label label;
- WritableCellFormat wcf;
- titiles = new List[startRow];
- List<String> list = null;
- for (int i = 0; i < startRow; i++) {
- list = new ArrayList<String>();
- for (int j = 0; j < columns; j++) {
- label = new Label(j, i, getCellValue(sheet.getCell(j, i)));
- writes.addCell(label);
- list.add(getValue(sheet.getCell(j, i)));
- }
- titiles[i] = list;
- }
- label = new Label(columns, startRow - 1, "错误详细");
- WritableFont wf0 = new WritableFont(WritableFont.TIMES, 12);
- wcf = new WritableCellFormat(wf0);
- label.setCellFormat(wcf);
- writes.addCell(label);
- int wi = startRow;
- // -------------------------
- StringBuffer info_temp = null;
- String result = null;
- Method method = null;
- for (int i = startRow; i < rows; i++) {
- curRowCells = sheet.getRow(i);
- if (curRowCells == null || curRowCells.length < minColumns) {
- continue;
- }
- boolean[] wj = new boolean[columns];
- info_temp = new StringBuffer();
- for (int j = startColumn; j < columns; j++) {
- curCell = sheet.getCell(j, i);
- // System.out.print(String.format("%-30.30s", this.getValue(curCell))+" ");
- result = everyCell();
- if (result != null) {
- method = methodMap.get(j);
- if (method == null) {
- continue;
- }
- result = (String) method.invoke(this, null);
- }
- if (result != null) {
- info_temp.append(result);
- info_temp.append(" ");
- wj[j] = true;
- }
- if (holdColumns.get(j) != null) {
- holdColumns.get(j).add(this.getValue(curCell));
- }
- if (info_temp.length() > 0) {
- errors.add("sheet "+sheet.getName()+" 中第 " + (i + 1) + " 行 :"
- + info_temp.toString());
- }
- }
- // System.out.println();
- if (info_temp.length() > 1) {
- for (int ii = startColumn; ii < columns; ii++) {
- Cell c_temp = sheet.getCell(ii, i);
- label = new Label(ii, wi, c_temp.getContents().trim());
- wcf = new WritableCellFormat();
- if (wj[ii])
- wcf.setBackground(Colour.RED);
- label.setCellFormat(wcf);
- writes.addCell(label);
- }
- label = new Label(columns, wi, info_temp.toString());
- WritableFont wf = new WritableFont(WritableFont.TIMES,
- 12);
- wf.setColour(Colour.RED);
- wcf = new WritableCellFormat(wf);
- label.setCellFormat(wcf);
- writes.addCell(label);
- wi++;
- } else {
- this.save();
- successcount ++;
- }
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- this.hasError = true;
- errors.add("sheet "+sheet.getName()+" 第"+this.curCell.getRow() +" 行 第 "+ this.curCell.getColumn()+" 列 :"+this.getCurCell().getContents()+" 遇到错误");
- return false;
- } finally {
- try {
- writew.write();
- writew.close();
- work.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- if (successcount < totalRows - sheet_num*startRow) {
- this.hasError = true;
- }
- return true;
- }
- /**
- * 全局验证,验证对行数和列数的要求
- *
- * @return
- */
- public boolean validGlobal(Sheet[] sheets) {
- for (int i = 0; i < sheets.length; i++) {
- if (minRows != -1 && sheets[i].getRows() < minRows) {
- return false;
- } else if (minColumns != -1 && sheets[i].getColumns() < minColumns) {
- return false;
- } else if (maxRows != -1 && sheets[i].getRows() > maxRows) {
- return false;
- } else if (maxColumns != -1 && sheets[i].getColumns() > maxColumns) {
- return false;
- }
- }
- return true;
- }
- /**
- * 一行数据验证成功后保存
- * @return boolean
- */
- public abstract boolean save();
- /**
- * 对每一个单元格进行的操作
- * @return boolean
- */
- public abstract String everyCell();
- /**
- * 初始化存储验证列方法的Map
- *
- * @throws SecurityException
- * @throws NoSuchMethodException
- */
- @SuppressWarnings("unchecked")
- private void setMethodMap() throws SecurityException, NoSuchMethodException {
- methodMap = new HashMap<Integer, Method>();
- if (columnMethods == null) {
- Method[] methods = this.getClass().getMethods();
- for (int i = 0; i < methods.length; i++) {
- if (methods[i].getName().startsWith("validColumn_")) {
- String column = methods[i].getName().substring(
- methods[i].getName().indexOf("_") + 1);
- try {
- methodMap.put(Integer.parseInt(column), methods[i]);
- } catch (Exception e) {
- throw new NumberFormatException("默认列明必须为数字");
- }
- }
- }
- } else {
- Class<ImportDataMultiSheet> class1 = (Class<ImportDataMultiSheet>) this.getClass();
- for (int i = 0; i < columnMethods.length; i++) {
- methodMap.put(i, class1.getMethod(columnMethods[i], null));
- }
- }
- }
- /**
- * 初始化存储保留列的Map,保留列用于验证某些列值时需引用其他列的情况
- */
- private void setHoldColumns() {
- holdColumns = new HashMap<Integer, List<String>>();
- if (needHoldColumns == null) {
- return;
- }
- for (int i = 0; i < needHoldColumns.length; i++) {
- holdColumns.put(needHoldColumns[i], new ArrayList<String>());
- }
- }
- /**
- * 获得给定单元格的实际值,对于时间会返回 'yyyy-MM-dd HH:mm:ss' 格式的字符串
- *
- * @param cell
- * @return String
- */
- public static String getCellValue(Cell cell) {
- if (cell.getType().equals(CellType.NUMBER)) {
- return Double.toString(((NumberCell) cell).getValue());
- } else if (cell.getType().equals(CellType.DATE)) {
- TimeZone gmt = TimeZone.getTimeZone("GMT");
- DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss",
- Locale.getDefault());
- dateFormat.setTimeZone(gmt);
- return dateFormat.format(((DateCell) cell).getDate());
- } else if (cell.getType().equals(CellType.EMPTY)) {
- return null;
- } else {
- return cell.getContents().trim();
- }
- }
- public String getValue(Cell cell){
- String value = getCellValue(cell);
- if (value == null || getCellValue(cell).equals("")) {
- for(MeregRange meregRange:meregRangeList){
- if (meregRange.isInRange(cell.getRow(), cell.getColumn())) {
- return meregRange.getValue();
- }
- }
- return value;
- }else {
- return value;
- }
- }
- /**
- * 防止空指针
- *
- * @param object
- * @return String
- */
- public String fixNull(Object object) {
- return object == null ? "" : object.toString();
- }
- public int getMinRows() {
- return minRows;
- }
- public void setMinRows(int minRows) {
- this.minRows = minRows;
- }
- public int getMinColumns() {
- return minColumns;
- }
- public void setMinColumns(int minColumns) {
- this.minColumns = minColumns;
- }
- public int getMaxRows() {
- return maxRows;
- }
- public void setMaxRows(int maxRows) {
- this.maxRows = maxRows;
- }
- public int getMaxColumns() {
- return maxColumns;
- }
- public void setMaxColumns(int maxColumns) {
- this.maxColumns = maxColumns;
- }
- public String[] getColumnMethods() {
- return columnMethods;
- }
- public void setColumnMethods(String[] columnMethods) {
- this.columnMethods = columnMethods;
- }
- public File getImportExcel() {
- return importExcel;
- }
- public void setImportExcel(File importExcel) {
- this.importExcel = importExcel;
- }
- public File getErrorExcel() {
- return errorExcel;
- }
- public void setErrorExcel(File errorExcel) {
- this.errorExcel = errorExcel;
- }
- public boolean isHasError() {
- return hasError;
- }
- public int[] getNeedHoldColumns() {
- return needHoldColumns;
- }
- public void setNeedHoldColumns(int[] needHoldColumns) {
- this.needHoldColumns = needHoldColumns;
- }
- public Map<Integer, List<String>> getHoldColumns() {
- return holdColumns;
- }
- public int getStartRow() {
- return startRow;
- }
- public void setStartRow(int startRow) {
- this.startRow = startRow;
- }
- public int getStartColumn() {
- return startColumn;
- }
- public void setStartColumn(int startColumn) {
- this.startColumn = startColumn;
- }
- public Cell getCurCell() {
- return curCell;
- }
- public List<String> getErrors() {
- return errors;
- }
- public Cell[] getCurRowCells() {
- return curRowCells;
- }
- public List<String>[] getTitiles() {
- return titiles;
- }
- public int getSuccesscount() {
- return successcount;
- }
- }
下面是一个实现类的范例:
- import java.io.File;
- import java.util.List;
- import jxl.Cell;
- import org.gaosheng.util.exception.EntityException;
- import org.gaosheng.util.xls.ImportDataMultiSheet;
- public class ImportDatemultiImp extends ImportDataMultiSheet {
- public static void main(String[] args) throws SecurityException, NoSuchMethodException, EntityException {
- File importFile = new File("F:/test.xls");
- File errorFile = new File("F:/error.xls");
- ImportDatemultiImp importDateImp = new ImportDatemultiImp();
- importDateImp.setImportExcel(importFile);
- importDateImp.setErrorExcel(errorFile);
- importDateImp.setStartRow(1);
- importDateImp.execute();
- importDateImp.getErrorExcel();
- for (String error : importDateImp.getErrors()) {
- System.out.println(error);
- }
- }
- //对每一个单元格的执行的统一操作,返回值为错误信息,没有错误则返回null
- public String everyCell() {
- Cell cell = this.getCurCell();
- List<String> semList = this.getTitiles()[2];
- List<String> courseList = this.getTitiles()[3];
- if (cell.getRow() > 3 && cell.getColumn() > 3) {
- String cellvalue = this.getValue(cell);
- String course_name = courseList.get(cell.getColumn());
- String reg_no = this.getValue(this.getCurRowCells()[1]);
- String stuname = this.getValue(this.getCurRowCells()[2]);
- if (cellvalue != null && !cellvalue.equals("") && course_name !=null && !course_name.equals("") && reg_no != null && !reg_no.equals("")) {
- }else {
- return "无效成绩";
- }
- }
- return null;
- }
- //定义每一列的验证,默认方法名是validColumn_+列索引,也可以用setColumnMethods(String[] columnMethods)指定列的验证方法 返回值为错误信息,没有错误则返回null
- public String validColumn_1(){
- if (!this.getCurCell().getContents().equals("name")) {
- return "姓名错误";
- }
- return null;
- }
- public String validColumn_2(){
- if (!this.getCurCell().getContents().equals("passwd")) {
- return "密码错误";
- }
- return null;
- }
- public String validColumn_3(){
- return null;
- }
- //验证成功后保存记录
- public boolean save() {
- return false;
- }
- }