最近要处理从页面导出交易数据的excel报表过大问题,首先想到的是生成excel文件时压缩成zip格式文件,然后导出压缩文件。于是本地实验,将一行重复写了60多万行,大小为118M,压缩输出后,只有5M多,差不多只有当初的1/20了,以为还不错。结果放到生产环境上后,同样大小的文件压缩效果不理想,一个118M的excel文件压缩后还有115M,几乎没有作用。原来本地做的测试方式有误。excel文件的压缩不只跟本身大小有关,还和里面的数据内容差异化有关。数据内容差异越大,压缩比越小。
最后只好还是将交易数据写入csv格式文件中,这种纯文本文件可以被压缩得很小。
修正后的实验对比(同样写入600000行,每一个单元格为6位数字。文件一:都为666666,文件二:都为6位随机数),测试对比效果很明显:
附测试代码:
import de.schlichtherle.util.zip.ZipEntry;
import de.schlichtherle.util.zip.ZipOutputStream;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import com.csvreader.CsvWriter;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class Demo {
private static int times = 600000;
public static void main(String[] args) {
String title = "测试1";
String[] headers = new String[] { "列A", "列B", "列C", "列D", "列E", "列F" };
// 文件1--数据重复
List<String[]> dataList = new ArrayList<>();
for (int k = 0; k < times; k++) {
String[] dataArr = new String[6];
for (int i = 0; i < dataArr.length; i++) {
dataArr[i] = "666666";
}
dataList.add(dataArr);
}
// 文件2--全为随机数
List<String[]> dataList2 = new ArrayList<>();
for (int k = 0; k < times; k++) {
String[] dataArr = new String[6];
for (int i = 0; i < dataArr.length; i++) {
dataArr[i] = String.valueOf((int) ((Math.random() * 9 + 1) * 100000));
}
dataList2.add(dataArr);
}
String outputPath1 = "D:\\E_disk\\testReport\\test1.xlsx";
String outputPath2 = "D:\\E_disk\\testReport\\test1.zip";
writeExcel(title, headers, dataList, outputPath1);
File file1 = new File(outputPath1);
File file2 = new File(outputPath2);
createZipFile(file1, file2);
String outputPath3 = "D:\\E_disk\\testReport\\test2.xlsx";
String outputPath4 = "D:\\E_disk\\testReport\\test2.zip";
writeExcel(title, headers, dataList2, outputPath3);
File file3 = new File(outputPath3);
File file4 = new File(outputPath4);
createZipFile(file3, file4);
// 测试csv文件
String outputPath5 = "D:\\E_disk\\testReport\\test3.csv";
String outputPath6 = "D:\\E_disk\\testReport\\test3.zip";
writeCsv(headers, dataList, outputPath5);
File file5 = new File(outputPath5);
File file6 = new File(outputPath6);
createZipFile(file5, file6);
String outputPath7 = "D:\\E_disk\\testReport\\test4.csv";
String outputPath8 = "D:\\E_disk\\testReport\\test4.zip";
writeCsv(headers, dataList2, outputPath7);
File file7 = new File(outputPath7);
File file8 = new File(outputPath8);
createZipFile(file7, file8);
}
public static void writeExcel(String title, String[] headers, List<String[]> dataList, String outputPath) {
FileOutputStream os = null;
BufferedOutputStream bos = null;
Workbook workbook = null;
try {
// 声明一个工作薄
workbook = new SXSSFWorkbook(1000);
// 生成一个表格
Sheet sheet = workbook.createSheet(title);
// 设置表格默认列宽度为15个字节
sheet.setDefaultColumnWidth(15);
// 产生表格标题行
Row row = sheet.createRow(0);
for (int i = 0; i < headers.length; i++) {
Cell cell = row.createCell(i);
XSSFRichTextString text = new XSSFRichTextString(headers[i]);
cell.setCellValue(text);
}
// 遍历集合数据,产生数据行
for (int i = 0; i < dataList.size(); i++) {
String[] dataArr = dataList.get(i);
row = sheet.createRow(i + 1);
for (int k = 0; k < dataArr.length; k++) {
Cell cell0 = row.createCell(k);
cell0.setCellValue(dataArr[k]);
}
}
File outFile = new File(outputPath);
os = new FileOutputStream(outFile);
bos = new BufferedOutputStream(os);
workbook.write(bos);
bos.flush();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
bos.close();
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void writeCsv(String[] headers, List<String[]> dataList, String outputPath) {
FileOutputStream fos = null;
BufferedOutputStream bos = null;
CsvWriter cwriter = null;
try {
File outputfile = new File(outputPath);
fos = new FileOutputStream(outputfile);
bos = new BufferedOutputStream(fos);
cwriter = new CsvWriter(bos, ',', Charset.forName("GBK"));
cwriter.writeRecord(headers);
for (String[] strarr : dataList) {
for (int i = 0; i < strarr.length; i++) {
strarr[i] = "\t" + strarr[i];
}
cwriter.writeRecord(strarr, true);
}
cwriter.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (cwriter != null)
cwriter.close();
if (bos != null)
bos.close();
if (fos != null)
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void createZipFile(File inputFile, File zipFile) {
FileInputStream fis = null;
BufferedInputStream bis = null;
FileOutputStream fos = null;
ZipOutputStream zipOut = null;
BufferedOutputStream bos = null;
try {
fis = new FileInputStream(inputFile);
bis = new BufferedInputStream(fis, 2048);
fos = new FileOutputStream(zipFile);
bos = new BufferedOutputStream(fos);
zipOut = new ZipOutputStream(bos);
zipOut.putNextEntry(new ZipEntry(inputFile.getName()));
byte[] buf = new byte[2048];
int len;
while ((len = bis.read(buf, 0, 2048)) != -1) {
zipOut.write(buf, 0, len);
}
zipOut.flush();
} catch (Throwable e) {
e.printStackTrace();
} finally {
try {
if (bis != null)
bis.close();
if (fis != null)
fis.close();
if (zipOut != null)
zipOut.close();
if (bos != null)
bos.close();
if (fos != null)
fos.close();
// 压缩完成后删除原始文件
// if(inputFile.exists())inputFile.delete();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}