问题描述
在开发excel导入数据的过程中,客户想直接将导出的excel直接导入到另一个系统,但是在导入的过程中发现报上面的错误,百度一波解决方案都是另存为excel再导入,但是客户只接受直接导出然后导入,最后经过一系列探索发现,虽然导出的文件是xls后缀的,但是当你另存为的时候发现其实本质是html文件,最后就需要研究怎么从html转excel了
解决方法
先读取html内容为字符串,主要方法如下
private String MultipartFileToString(MultipartFile multipartFile) {
InputStreamReader isr;
BufferedReader br;
StringBuilder txtResult = new StringBuilder();
try {
isr = new InputStreamReader(multipartFile.getInputStream(), StandardCharsets.UTF_8);
br = new BufferedReader(isr);
String lineTxt;
while ((lineTxt = br.readLine()) != null) {
txtResult.append(lineTxt);
}
isr.close();
br.close();
return txtResult.toString();
} catch (Exception e) {
logger.error("文件读取失败");
return "";
}
}
再讲字符串转为HSSFWorkbook,方法如下(从网络copy修改的)
package com.qingyuan.business.utils;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFPalette;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellRangeAddress;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.*;
public class HtmlToExcel {
/**
* @param html 字符串的html
* @param sheetName sheet页的名字
* @throws IOException
*/
public static HSSFWorkbook toExcel(String html, String sheetName) throws IOException {
HSSFWorkbook wb = new HSSFWorkbook();
//表头单元格风格
HSSFCellStyle thStyle = wb.createCellStyle();
thStyle.setFillForegroundColor(IndexedColors.AQUA.getIndex());
thStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
//上下左右居中
thStyle.setAlignment(HorizontalAlignment.CENTER);
thStyle.setVerticalAlignment(VerticalAlignment.CENTER);
//边框
thStyle.setBorderTop(BorderStyle.THIN);
thStyle.setBorderRight(BorderStyle.THIN);
thStyle.setBorderBottom(BorderStyle.THIN);
thStyle.setBorderLeft(BorderStyle.THIN);
thStyle.setTopBorderColor(IndexedColors.WHITE.getIndex());
thStyle.setRightBorderColor(IndexedColors.WHITE.getIndex());
thStyle.setBottomBorderColor(IndexedColors.WHITE.getIndex());
thStyle.setLeftBorderColor(IndexedColors.WHITE.getIndex());
//字体
Font thFont = wb.createFont();
thFont.setColor(IndexedColors.BLACK.getIndex());
thFont.setBold(true);
thStyle.setFont(thFont);
Sheet sheet = wb.createSheet(sheetName);
//获取html的数据
List<List<Map<String, String>>> excelData = getExcelData(html);
//处理数据
for (int rowNum = 0; rowNum < excelData.size(); rowNum++) {
//外层是循环行,每循环一次,创建一个行的对象
Row row = sheet.createRow(rowNum);
//设置行的高度
row.setHeightInPoints(25);
for (int cellNum = 0; cellNum < excelData.get(rowNum).size(); cellNum++) {
//处理跨行跨列
if ((excelData.get(rowNum).get(cellNum).get("colspanValue") != null) && (excelData.get(rowNum).get(cellNum).get("rowspanValue") != null)) {
int colspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("colspanValue"));
int rowspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("rowspanValue"));
sheet.addMergedRegion(new CellRangeAddress(rowNum, rowspanValue + rowNum - 1, cellNum, cellNum + colspanValue - 1));
} else {
if (excelData.get(rowNum).get(cellNum).get("colspanValue") != null) {
int colspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("colspanValue"));
sheet.addMergedRegion(new CellRangeAddress(rowNum, rowNum, cellNum, cellNum + colspanValue - 1));
}
if (excelData.get(rowNum).get(cellNum).get("rowspanValue") != null) {
int rowspanValue = Integer.parseInt(excelData.get(rowNum).get(cellNum).get("rowspanValue"));
sheet.addMergedRegion(new CellRangeAddress(rowNum, rowspanValue + rowNum - 1, cellNum, cellNum));
}
}
//内层循环每行的单元格,每循环一次,创建一个单元格的对象
Cell cell = row.createCell(cellNum);
//赋值
cell.setCellValue(excelData.get(rowNum).get(cellNum).get("value"));
//设置样式
if (excelData.get(rowNum).get(cellNum).get("style") != null) {
if ("th".equals(excelData.get(rowNum).get(cellNum).get("style"))) {
cell.setCellStyle(thStyle);
}
if ("td".equals(excelData.get(rowNum).get(cellNum).get("style"))) {
//表体
HSSFCellStyle tdStyle = wb.createCellStyle();
tdStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
//上下左右居中
tdStyle.setAlignment(HorizontalAlignment.CENTER);
tdStyle.setVerticalAlignment(VerticalAlignment.CENTER);
//边框
tdStyle.setBorderTop(BorderStyle.THIN);
tdStyle.setBorderRight(BorderStyle.THIN);
tdStyle.setBorderBottom(BorderStyle.THIN);
tdStyle.setBorderLeft(BorderStyle.THIN);
tdStyle.setTopBorderColor(IndexedColors.WHITE.getIndex());
tdStyle.setRightBorderColor(IndexedColors.WHITE.getIndex());
tdStyle.setBottomBorderColor(IndexedColors.WHITE.getIndex());
tdStyle.setLeftBorderColor(IndexedColors.WHITE.getIndex());
if (rowNum % 2 == 0) {
tdStyle.setFillForegroundColor(IndexedColors.LIGHT_TURQUOISE.getIndex());
} else {
tdStyle.setFillForegroundColor(IndexedColors.WHITE.getIndex());
}
cell.setCellStyle(tdStyle);
}
}
//设置宽度
sheet.setColumnWidth(cellNum, (excelData.get(rowNum).get(cellNum).get("value").length() + 20) * 256);
}
}
HSSFPalette palette = wb.getCustomPalette();
//获取配置的颜色
palette.setColorAtIndex(IndexedColors.ORANGE.getIndex(),
(byte) 97, //RGB red (0-255)
(byte) 191, //RGB green
(byte) 130 //RGB blue
);
palette.setColorAtIndex(IndexedColors.BLUE.getIndex(),
(byte) 233, //RGB red (0-255)
(byte) 244, //RGB green
(byte) 232 //RGB blue
);
return wb;
}
public static List<List<Map<String, String>>> getExcelData(String tableHtml) {
Document document = Jsoup.parse(tableHtml);
//取得表体的html
Elements tbody = document.select("tbody").select("tr");
//获取table的最大的列数
List<Integer> tdSize = new ArrayList<>();
for (Element element : tbody) {
tdSize.add(element.select("td").size());
}
Collections.sort(tdSize);
//最后一行就是整个表格最大的列数
int rowCellNum = tdSize.get(tdSize.size() - 1);
//整个table的<tr>
Elements trAll = document.select("tr");
//tr就是整个表格的行数
int excelDataSize = trAll.size();
//存放表格数据
List<List<Map<String, String>>> excelData = new ArrayList<>();
//先创建空的excel数据
for (int i = 0; i < excelDataSize; i++) {
excelData.add(new ArrayList<>());
for (int j = 0; j < rowCellNum; j++) {
excelData.get(i).add(new HashMap<>());
}
}
//按行数进行循环
for (int rowNum = 0; rowNum < excelData.size(); rowNum++) {
//取得每一行的html
Element element = trAll.get(rowNum);
//这一行的表头
Elements th = element.select("th");
Elements td = element.select("td");
//列数
int index = 0;
//循环一行的数据
for (int cellNum = 0; cellNum < excelData.get(rowNum).size(); cellNum++) {
//只有value==null的才是没有被赋值的,因为跨行跨列的已经赋值过了
if (excelData.get(rowNum).get(cellNum).get("value") == null) {
//取一个单元格对象
Element cell = null;
//样式的标识
String style = "";
//表头th处理
if (th.size() != 0) {
//这是表头的样式
style = "th";
excelData.get(rowNum).get(cellNum).put("style", style);
if (th.size() > index) {
cell = th.get(index);
}
}
//表体td处理
if (td.size() != 0) {
//这是表体的样式
style = "td";
excelData.get(rowNum).get(cellNum).put("style", style);
if (td.size() > index) {
cell = td.get(index);
}
}
//单元格的值
String value = cell == null ? "" : cell.text();
//处理跨行跨列
String cellHtml = cell == null ? "" : cell.outerHtml();
//rowspan和colspan都有值的情况下
if ((cellHtml.indexOf("rowspan") != -1 && cellHtml.indexOf("\"\"") == -1) || (cellHtml.indexOf("colspan") != -1 && cellHtml.indexOf("\"\"") == -1)) {
String rowspanValue = "0";
String colspanValue = "0";
//rowspan有值 取值并给到单元格对象
if (cellHtml.indexOf("rowspan") != -1 && cellHtml.indexOf("\"\"") == -1) {
String rowspanValueStr = cellHtml.substring(cellHtml.indexOf("rowspan") + 7);
rowspanValueStr = rowspanValueStr.substring(rowspanValueStr.indexOf("\"") + 1);
rowspanValueStr = rowspanValueStr.substring(0, rowspanValueStr.indexOf("\""));
rowspanValue = rowspanValueStr;
excelData.get(rowNum).get(cellNum).put("rowspanValue", rowspanValue);
}
//colspan有值 取值并给到单元格对象
if (cellHtml.indexOf("colspan") != -1 && cellHtml.indexOf("\"\"") == -1) {
String colspanValueStr = cellHtml.substring(cellHtml.indexOf("colspan") + 7);
colspanValueStr = colspanValueStr.substring(colspanValueStr.indexOf("\"") + 1);
colspanValueStr = colspanValueStr.substring(0, colspanValueStr.indexOf("\""));
colspanValue = colspanValueStr;
excelData.get(rowNum).get(cellNum).put("colspanValue", colspanValue);
}
//这个单元格又跨行又跨列
if (Integer.parseInt(rowspanValue) > 0 && Integer.parseInt(colspanValue) > 0) {
//把他跨列的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应
for (int i = 1; i < Integer.parseInt(rowspanValue); i++) {
excelData.get(rowNum + i).get(cellNum).put("value", value);
excelData.get(rowNum + i).get(cellNum).put("style", style);
//把他跨行的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应
for (int j = 1; j < Integer.parseInt(colspanValue); j++) {
excelData.get(rowNum).get(cellNum + j).put("value", value);
excelData.get(rowNum).get(cellNum + j).put("style", style);
excelData.get(rowNum + i).get(cellNum + j).put("value", value);
excelData.get(rowNum + i).get(cellNum + j).put("style", style);
}
}
}
//跨行或者跨列
else {
//跨列
if (Integer.parseInt(rowspanValue) > 0) {
//把他跨列的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应
for (int i = 1; i < Integer.parseInt(rowspanValue); i++) {
excelData.get(rowNum + i).get(cellNum).put("value", value);
excelData.get(rowNum + i).get(cellNum).put("style", style);
}
}
if (Integer.parseInt(colspanValue) > 0) {
//把他跨行的单元格给赋上值 , 没有赋值就是null , 和559行的逻辑呼应
for (int j = 1; j < Integer.parseInt(colspanValue); j++) {
excelData.get(rowNum).get(cellNum + j).put("value", value);
excelData.get(rowNum).get(cellNum + j).put("style", style);
}
}
}
}
excelData.get(rowNum).get(cellNum).put("value", value);
index = index + 1;
}
}
}
return excelData;
}
}
然后就是内容读取了
注:在这当中遇到的坑
html转excel之后,数据所在行和原来的文件对不上,需要自己去看转换的内容,表头,数据在哪一行。像下面这种表格,解析之后会自动去掉第一行,测试合并的行算一行

转换之后,原来表格的数字会全部转变为字符串,并且像下图这种数据带有,的,需要自己去替换掉,并且转换成数字,不然会报错
