一、描述
前端时间写了注解方式Excel的读取和写入,它是根据注解完成Excel的操作,虽说支持大数据,但对于超大数据就无能为力了,因为它的读写期间都是将所有数据放入系统内存的,除非你有超大的内存。
因项目需要对超大数据的Excel读写操作,于是网上找了个超大数据的读写代码,这个不需要太大内存。并对此进行了简单的修改。
原理如下:
Excel超大数据读取:抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析 xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低内存的耗费,特别使用于大数据量的文件。
Excel超大数据写入:抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml, 使用这种方法 写入.xlsx文件,不需要太大的内存。
先看调用示例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
String file =
"E:/导入测试数据.xlsx"
;
ExcelReader reader =
new
ExcelReader() {
public
void
getRows(
int
sheetIndex,
int
curRow, List<String> rowList) {
System.out.println(
"Sheet:"
+ sheetIndex +
", Row:"
+ curRow +
", Data:"
+rowList);
}
};
reader.process(file,
1
);
String file =
"E:/导出测试数据.xlsx"
;
ExcelWriter writer =
new
ExcelWriter() {
public
void
generate()
throws
Exception {
// 电子表格开始
this
.beginSheet();
for
(
int
rownum =
0
; rownum <
100
; rownum++) {
// 插入新行
this
.insertRow(rownum);
// 建立新单元格,索引值从0开始,表示第一列
this
.createCell(
0
,
"第 "
+ rownum +
" 行"
);
this
.createCell(
1
,
34343.123456789
);
this
.createCell(
2
,
"23.67%"
);
this
.createCell(
3
,
"12:12:23"
);
this
.createCell(
4
,
"2014-10-11 12:12:23"
);
this
.createCell(
5
,
"true"
);
this
.createCell(
6
,
"false"
);
// 结束行
this
.endRow();
}
// 电子表格结束
this
.endSheet();
}
};
writer.process(file);
}
|
这里只展示了对数据的读取和写入,如果正式保存到数据库时建议读取一部分(如100条)再写入一次数据库,尽量不要读取一条就写入一条,这样会非常耗费资源。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
|
import
java.io.InputStream;
import
java.math.BigDecimal;
import
java.text.SimpleDateFormat;
import
java.util.ArrayList;
import
java.util.Date;
import
java.util.Iterator;
import
java.util.List;
import
org.apache.poi.hssf.usermodel.HSSFDateUtil;
import
org.apache.poi.openxml4j.opc.OPCPackage;
import
org.apache.poi.xssf.eventusermodel.XSSFReader;
import
org.apache.poi.xssf.model.SharedStringsTable;
import
org.apache.poi.xssf.usermodel.XSSFRichTextString;
import
org.xml.sax.Attributes;
import
org.xml.sax.InputSource;
import
org.xml.sax.SAXException;
import
org.xml.sax.XMLReader;
import
org.xml.sax.helpers.DefaultHandler;
import
org.xml.sax.helpers.XMLReaderFactory;
/**
* Excel超大数据读取,抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析
* xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低 内存的耗费,特别使用于大数据量的文件。
* @version 2014-9-2
*/
public
abstract
class
ExcelReader
extends
DefaultHandler {
// 共享字符串表
private
SharedStringsTable sst;
// 上一次的内容
private
String lastContents;
private
boolean
nextIsString;
private
int
sheetIndex = -
1
;
private
List<String> rowList =
new
ArrayList<String>();
// 当前行
private
int
curRow =
0
;
// 当前列
private
int
curCol =
0
;
// 日期标志
private
boolean
dateFlag;
// 数字标志
private
boolean
numberFlag;
private
boolean
isTElement;
/**
* 遍历工作簿中所有的电子表格
* @param filename
* @throws Exception
*/
public
void
process(String filename)
throws
Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r =
new
XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while
(sheets.hasNext()) {
curRow =
0
;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource =
new
InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
/**
* 只遍历一个电子表格,其中sheetId为要遍历的sheet索引,从1开始,1-3
* @param filename
* @param sheetId
* @throws Exception
*/
public
void
process(String filename,
int
sheetId)
throws
Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r =
new
XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// 根据 rId# 或 rSheet# 查找sheet
InputStream sheet2 = r.getSheet(
"rId"
+ sheetId);
sheetIndex++;
InputSource sheetSource =
new
InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public
XMLReader fetchSheetParser(SharedStringsTable sst)
throws
SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
this
.sst = sst;
parser.setContentHandler(
this
);
return
parser;
}
public
void
startElement(String uri, String localName, String name,
Attributes attributes)
throws
SAXException {
// System.out.println("startElement: " + localName + ", " + name + ", " + attributes);
// c => 单元格
if
(
"c"
.equals(name)) {
// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
String cellType = attributes.getValue(
"t"
);
if
(
"s"
.equals(cellType)) {
nextIsString =
true
;
}
else
{
nextIsString =
false
;
}
// 日期格式
String cellDateType = attributes.getValue(
"s"
);
if
(
"1"
.equals(cellDateType)) {
dateFlag =
true
;
}
else
{
dateFlag =
false
;
}
String cellNumberType = attributes.getValue(
"s"
);
if
(
"2"
.equals(cellNumberType)) {
numberFlag =
true
;
}
else
{
numberFlag =
false
;
}
}
// 当元素为t时
if
(
"t"
.equals(name)) {
isTElement =
true
;
}
else
{
isTElement =
false
;
}
// 置空
lastContents =
""
;
}
public
void
endElement(String uri, String localName, String name)
throws
SAXException {
// System.out.println("endElement: " + localName + ", " + name);
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if
(nextIsString) {
try
{
int
idx = Integer.parseInt(lastContents);
lastContents =
new
XSSFRichTextString(sst.getEntryAt(idx))
.toString();
}
catch
(Exception e) {
}
}
// t元素也包含字符串
if
(isTElement) {
String value = lastContents.trim();
rowList.add(curCol, value);
curCol++;
isTElement =
false
;
// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
}
else
if
(
"v"
.equals(name)) {
String value = lastContents.trim();
value = value.equals(
""
) ?
" "
: value;
try
{
// 日期格式处理
if
(dateFlag) {
Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
SimpleDateFormat dateFormat =
new
SimpleDateFormat(
"dd/MM/yyyy"
);
value = dateFormat.format(date);
}
// 数字类型处理
if
(numberFlag) {
BigDecimal bd =
new
BigDecimal(value);
value = bd.setScale(
3
, BigDecimal.ROUND_UP).toString();
}
}
catch
(Exception e) {
// 转换失败仍用读出来的值
}
rowList.add(curCol, value);
curCol++;
}
else
{
// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
if
(name.equals(
"row"
)) {
getRows(sheetIndex +
1
, curRow, rowList);
rowList.clear();
curRow++;
curCol =
0
;
}
}
}
public
void
characters(
char
[] ch,
int
start,
int
length)
throws
SAXException {
// 得到单元格内容的值
lastContents +=
new
String(ch, start, length);
}
/**
* 获取行数据回调
* @param sheetIndex
* @param curRow
* @param rowList
*/
public
abstract
void
getRows(
int
sheetIndex,
int
curRow, List<String> rowList);
/**
* 测试方法
*/
public
static
void
main(String[] args)
throws
Exception {
String file =
"E:/导入测试数据.xlsx"
;
ExcelReader reader =
new
ExcelReader() {
public
void
getRows(
int
sheetIndex,
int
curRow, List<String> rowList) {
System.out.println(
"Sheet:"
+ sheetIndex +
", Row:"
+ curRow +
", Data:"
+rowList);
}
};
reader.process(file,
1
);
}
}
|
以下为写入程序
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
import
java.io.File;
import
java.io.FileInputStream;
import
java.io.FileOutputStream;
import
java.io.FileWriter;
import
java.io.IOException;
import
java.io.InputStream;
import
java.io.OutputStream;
import
java.io.Writer;
import
java.util.Calendar;
import
java.util.Enumeration;
import
java.util.zip.ZipEntry;
import
java.util.zip.ZipFile;
import
java.util.zip.ZipOutputStream;
import
org.apache.poi.hssf.util.CellReference;
import
org.apache.poi.ss.usermodel.DateUtil;
import
org.apache.poi.xssf.usermodel.XSSFSheet;
import
org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* Excel超大数据写入,抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml,
* 使用这种方法 写入.xlsx文件,不需要太大的内存
* @version 2014-9-2
*/
public
abstract
class
ExcelWriter {
private
SpreadsheetWriter sw;
/**
* 写入电子表格的主要流程
*
* @param fileName
* @throws Exception
*/
public
void
process(String fileName)
throws
Exception {
// 建立工作簿和电子表格对象
XSSFWorkbook wb =
new
XSSFWorkbook();
XSSFSheet sheet = wb.createSheet(
"sheet1"
);
// 持有电子表格数据的xml文件名 例如 /xl/worksheets/sheet1.xml
String sheetRef = sheet.getPackagePart().getPartName().getName();
// 保存模板
FileOutputStream os =
new
FileOutputStream(
"template.xlsx"
);
wb.write(os);
os.close();
// 生成xml文件
File tmp = File.createTempFile(
"sheet"
,
".xml"
);
Writer fw =
new
FileWriter(tmp);
sw =
new
SpreadsheetWriter(fw);
generate();
fw.close();
// 使用产生的数据替换模板
File templateFile =
new
File(
"template.xlsx"
);
FileOutputStream out =
new
FileOutputStream(fileName);
substitute(templateFile, tmp, sheetRef.substring(
1
), out);
out.close();
// 删除文件之前调用一下垃圾回收器,否则无法删除模板文件
System.gc();
// 删除临时模板文件
if
(templateFile.isFile() && templateFile.exists()) {
templateFile.delete();
}
}
/**
* 类使用者应该使用此方法进行写操作
*
* @throws Exception
*/
public
abstract
void
generate()
throws
Exception;
public
void
beginSheet()
throws
IOException {
sw.beginSheet();
}
public
void
insertRow(
int
rowNum)
throws
IOException {
sw.insertRow(rowNum);
}
public
void
createCell(
int
columnIndex, String value)
throws
IOException {
sw.createCell(columnIndex, value, -
1
);
}
public
void
createCell(
int
columnIndex,
double
value)
throws
IOException {
sw.createCell(columnIndex, value, -
1
);
}
public
void
endRow()
throws
IOException {
sw.endRow();
}
public
void
endSheet()
throws
IOException {
sw.endSheet();
}
/**
*
* @param zipfile
* the template file
* @param tmpfile
* the XML file with the sheet data
* @param entry
* the name of the sheet entry to substitute, e.g.
* xl/worksheets/sheet1.xml
* @param out
* the stream to write the result to
*/
private
static
void
substitute(File zipfile, File tmpfile, String entry,
OutputStream out)
throws
IOException {
ZipFile zip =
new
ZipFile(zipfile);
ZipOutputStream zos =
new
ZipOutputStream(out);
@SuppressWarnings
(
"unchecked"
)
Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) zip.entries();
while
(en.hasMoreElements()) {
ZipEntry ze = en.nextElement();
if
(!ze.getName().equals(entry)) {
zos.putNextEntry(
new
ZipEntry(ze.getName()));
InputStream is = zip.getInputStream(ze);
copyStream(is, zos);
is.close();
}
}
zos.putNextEntry(
new
ZipEntry(entry));
InputStream is =
new
FileInputStream(tmpfile);
copyStream(is, zos);
is.close();
zos.close();
}
private
static
void
copyStream(InputStream in, OutputStream out)
throws
IOException {
byte
[] chunk =
new
byte
[
1024
];
int
count;
while
((count = in.read(chunk)) >=
0
) {
out.write(chunk,
0
, count);
}
}
/**
* 在写入器中写入电子表格
*
*/
public
static
class
SpreadsheetWriter {
private
final
Writer _out;
private
int
_rownum;
private
static
String LINE_SEPARATOR = System
.getProperty(
"line.separator"
);
public
SpreadsheetWriter(Writer out) {
_out = out;
}
public
void
beginSheet()
throws
IOException {
_out.write(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
_out.write(
"<sheetData>"
+ LINE_SEPARATOR);
}
public
void
endSheet()
throws
IOException {
_out.write(
"</sheetData>"
);
_out.write(
"</worksheet>"
);
}
/**
* 插入新行
*
* @param rownum
* 以0开始
*/
public
void
insertRow(
int
rownum)
throws
IOException {
_out.write(
"<row r=\""
+ (rownum +
1
) +
"\">"
+ LINE_SEPARATOR);
this
._rownum = rownum;
}
/**
* 插入行结束标志
*/
public
void
endRow()
throws
IOException {
_out.write(
"</row>"
+ LINE_SEPARATOR);
}
/**
* 插入新列
*
* @param columnIndex
* @param value
* @param styleIndex
* @throws IOException
*/
public
void
createCell(
int
columnIndex, String value,
int
styleIndex)
throws
IOException {
String ref =
new
CellReference(_rownum, columnIndex)
.formatAsString();
_out.write(
"<c r=\""
+ ref +
"\" t=\"inlineStr\""
);
if
(styleIndex != -
1
)
_out.write(
" s=\""
+ styleIndex +
"\""
);
_out.write(
">"
);
_out.write(
"<is><t>"
+ encoderXML(value) +
"</t></is>"
);
_out.write(
"</c>"
);
}
public
void
createCell(
int
columnIndex, String value)
throws
IOException {
createCell(columnIndex, value, -
1
);
}
public
void
createCell(
int
columnIndex,
double
value,
int
styleIndex)
throws
IOException {
String ref =
new
CellReference(_rownum, columnIndex)
.formatAsString();
_out.write(
"<c r=\""
+ ref +
"\" t=\"n\""
);
if
(styleIndex != -
1
)
_out.write(
" s=\""
+ styleIndex +
"\""
);
_out.write(
">"
);
_out.write(
"<v>"
+ value +
"</v>"
);
_out.write(
"</c>"
);
}
public
void
createCell(
int
columnIndex,
double
value)
throws
IOException {
createCell(columnIndex, value, -
1
);
}
public
void
createCell(
int
columnIndex, Calendar value,
int
styleIndex)
throws
IOException {
createCell(columnIndex, DateUtil.getExcelDate(value,
false
),
styleIndex);
}
}
// XML Encode
private
static
final
String[] xmlCode =
new
String[
256
];
static
{
// Special characters
xmlCode[
'\''
] =
"'"
;
xmlCode[
'\"'
] = "\
""
;
// double quote
xmlCode[
'&'
] =
"&"
;
// ampersand
xmlCode[
'<'
] =
"<"
;
// lower than
xmlCode[
'>'
] =
">"
;
// greater than
}
/**
* <p>
* Encode the given text into xml.
* </p>
*
* @param string
* the text to encode
* @return the encoded string
*/
public
static
String encoderXML(String string) {
if
(string ==
null
)
return
""
;
int
n = string.length();
char
character;
String xmlchar;
StringBuffer buffer =
new
StringBuffer();
// loop over all the characters of the String.
for
(
int
i =
0
; i < n; i++) {
character = string.charAt(i);
// the xmlcode of these characters are added to a StringBuffer
// one by one
try
{
xmlchar = xmlCode[character];
if
(xmlchar ==
null
) {
buffer.append(character);
}
else
{
buffer.append(xmlCode[character]);
}
}
catch
(ArrayIndexOutOfBoundsException aioobe) {
buffer.append(character);
}
}
return
buffer.toString();
}
/**
* 测试方法
*/
public
static
void
main(String[] args)
throws
Exception {
String file =
"E:/导出测试数据.xlsx"
;
ExcelWriter writer =
new
ExcelWriter() {
public
void
generate()
throws
Exception {
// 电子表格开始
this
.beginSheet();
for
(
int
rownum =
0
; rownum <
100
; rownum++) {
// 插入新行
this
.insertRow(rownum);
// 建立新单元格,索引值从0开始,表示第一列
this
.createCell(
0
,
"第 "
+ rownum +
" 行"
);
this
.createCell(
1
,
34343.123456789
);
this
.createCell(
2
,
"23.67%"
);
this
.createCell(
3
,
"12:12:23"
);
this
.createCell(
4
,
"2014-10-11 12:12:23"
);
this
.createCell(
5
,
"true"
);
this
.createCell(
6
,
"false"
);
// 结束行
this
.endRow();
}
// 电子表格结束
this
.endSheet();
}
};
writer.process(file);
}
}
|