导出
程序实现有以下技术难点
1. 一次读取这么大的数据量,肯定会导致服务器内存溢出;
2. 调用接口保存一次传输数据量太大,网络传输压力会很大;
3. 最终通过SQL一次批量插入,对数据库压力也比较大,如果业务同时操作这个表数据,很容易造成死锁。
解决思路
根据列举的技术难点我的解决思路是:
1. 既然一次读取整个导入文件,那就先将文件流上传到服务器磁盘,然后分批从磁盘读取(支持多线程读取),这样就防止内存溢出;
2. 调用插入数据库接口也是根据分批读取的内容进行调用;
3. 分批插入数据到数据库。
(根据学过大数据的相关知识---可以将解决的思路:上传导入文件Excel到Hadoop进行存储,利用spark进行读取并写入数据库)
导入
使用POI或JXLS导出大数据量(百万级)Excel报表常常面临两个问题:
1. 服务器内存溢出;
2. 一次从数据库查询出这么大数据,查询缓慢。
当然也可以分页查询出数据,分别生成多个Excel打包下载,但这种生成还是很缓慢
我利用StringTemplate模板解析技术对XML模板进行填充。当然也可以使用FreeMarker, Velocity等Java模板技术实现。
首先引入StringTemplate所需Jar包:
使用技术为 stringTemplate
pom.xml:
<dependency>
<groupId>antlr</groupId>
<artifactId>antlr</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>stringtemplate</artifactId>
<version>3.2.1</version>
</dependency>
首先准备导出Excel模板,然后打开-》另存为-》选择格式为XML,然后用文本打开XML,提取XML头模板(head.st可通用),数据体模板(boday.st):
head.st可通用:
<?xml version="1.0"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:html="http://www.w3.org/TR/REC-html40">
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<Created>1996-12-17T01:32:42Z</Created>
<LastSaved>2013-08-02T09:21:24Z</LastSaved>
<Version>11.9999</Version>
</DocumentProperties>
<OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
<RemovePersonalInformation/>
</OfficeDocumentSettings>
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
<WindowHeight>4530</WindowHeight>
<WindowWidth>8505</WindowWidth>
<WindowTopX>480</WindowTopX>
<WindowTopY>120</WindowTopY>
<AcceptLabelsInFormulas/>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
<Styles>
<Style ss:ID="Default" ss:Name="Normal">
<Alignment ss:Vertical="Bottom"/>
<Borders/>
<Font ss:FontName="宋体" x:CharSet="134" ss:Size="12"/>
<Interior/>
<NumberFormat/>
<Protection/>
</Style>
</Styles>
body.st
$worksheet:{
<Worksheet ss:Name="$it.sheet$">
<Table ss:ExpandedColumnCount="$it.columnNum$" ss:ExpandedRowCount="$it.rowNum$" x:FullColumns="1"
x:FullRows="1" ss:DefaultColumnWidth="54" ss:DefaultRowHeight="14.25">
$it.rows:{
<Row>
<Cell><Data ss:Type="String">$it.name1$</Data></Cell>
<Cell><Data ss:Type="String">$it.name2$</Data></Cell>
<Cell><Data ss:Type="String">$it.name3$</Data></Cell>
</Row>
}$
</Table>
</Worksheet>
}$
生成大数据量Excel类:
ExcelGenerator:
package test.exportexcel;
import org.antlr.stringtemplate.StringTemplate;
import org.antlr.stringtemplate.StringTemplateGroup;
import test.exportexcel.bean.Row;
import test.exportexcel.bean.Worksheet;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* 类功能描述:generator big data Excel
*
* @author WangXueXing create at 19-4-13 下午10:23
* @version 1.0.0
*/
public class ExcelGenerator {
public static void main(String[] args) throws FileNotFoundException{
ExcelGenerator template = new ExcelGenerator();
template.output2();
}
/**
* 生成数据量大的时候,该方法会出现内存溢出
* @throws FileNotFoundException
*/
public void output1() throws FileNotFoundException{
StringTemplateGroup stGroup = new StringTemplateGroup("stringTemplate");
StringTemplate st4 = stGroup.getInstanceOf("test/exportexcel/template/test");
List<Worksheet> worksheets = new ArrayList<>();
File file = new File("/home/barry/data/output.xls");
PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(file)));
for(int i=0;i<30;i++){
Worksheet worksheet = new Worksheet();
worksheet.setSheet("第"+(i+1)+"页");
List<Row> rows = new ArrayList<>();
for(int j=0;j<6000;j++){
Row row = new Row();
row.setName1("zhangzehao");
row.setName2(""+j);
row.setName3(i+" "+j);
rows.add(row);
}
worksheet.setRows(rows);
worksheets.add(worksheet);
}
st4.setAttribute("worksheets", worksheets);
writer.write(st4.toString());
writer.flush();
writer.close();
System.out.println("生成excel完成");
}
/**
* 该方法不管生成多大的数据量,都不会出现内存溢出,只是时间的长短
* 经测试,生成2400万数据,2分钟内,4.5G大的文件,打开大文件就看内存是否足够大了
* 数据量小的时候,推荐用JXLS的模板技术生成excel文件,谁用谁知道,大数据量可以结合该方法使用
* @throws FileNotFoundException
*/
public void output2() throws FileNotFoundException{
long startTimne = System.currentTimeMillis();
StringTemplateGroup stGroup = new StringTemplateGroup("stringTemplate");
//写入excel文件头部信息
StringTemplate head = stGroup.getInstanceOf("test/exportexcel/template/head");
File file = new File("/home/barry/data/output.xls");
PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(file)));
writer.print(head.toString());
writer.flush();
int sheets = 400;
//excel单表最大行数是65535
int maxRowNum = 60000;
//写入excel文件数据信息
for(int i=0;i<sheets;i++){
StringTemplate body = stGroup.getInstanceOf("test/exportexcel/template/body");
Worksheet worksheet = new Worksheet();
worksheet.setSheet(" "+(i+1)+" ");
worksheet.setColumnNum(3);
worksheet.setRowNum(maxRowNum);
List<Row> rows = new ArrayList<>();
for(int j=0;j<maxRowNum;j++){
Row row = new Row();
row.setName1(""+new Random().nextInt(100000));
row.setName2(""+j);
row.setName3(i+""+j);
rows.add(row);
}
worksheet.setRows(rows);
body.setAttribute("worksheet", worksheet);
writer.print(body.toString());
writer.flush();
rows.clear();
rows = null;
worksheet = null;
body = null;
Runtime.getRuntime().gc();
System.out.println("正在生成excel文件的 sheet"+(i+1));
}
//写入excel文件尾部
writer.print("</Workbook>");
writer.flush();
writer.close();
System.out.println("生成excel文件完成");
long endTime = System.currentTimeMillis();
System.out.println("用时="+((endTime-startTimne)/1000)+"秒");
}
}
定义JavaBean:
WorkSheet.java:
package test.exportexcel.bean;
import java.util.List;
/**
* 类功能描述:Excel sheet Bean
*
* @author WangXueXing create at 19-4-13 下午10:21
* @version 1.0.0
*/
public class Worksheet {
private String sheet;
private int columnNum;
private int rowNum;
private List<Row> rows;
public String getSheet() {
return sheet;
}
public void setSheet(String sheet) {
this.sheet = sheet;
}
public List<Row> getRows() {
return rows;
}
public void setRows(List<Row> rows) {
this.rows = rows;
}
public int getColumnNum() {
return columnNum;
}
public void setColumnNum(int columnNum) {
this.columnNum = columnNum;
}
public int getRowNum() {
return rowNum;
}
public void setRowNum(int rowNum) {
this.rowNum = rowNum;
}
}
Row.java:
package test.exportexcel.bean;
/**
* 类功能描述:Excel row bean
*
* @author WangXueXing create at 19-4-13 下午10:22
* @version 1.0.0
*/
public class Row {
private String name1;
private String name2;
private String name3;
public String getName1() {
return name1;
}
public void setName1(String name1) {
this.name1 = name1;
}
public String getName2() {
return name2;
}
public void setName2(String name2) {
this.name2 = name2;
}
public String getName3() {
return name3;
}
public void setName3(String name3) {
this.name3 = name3;
}
}