/**
* table网页 表格转换为 markdown形式
* @return
*/
public static String convert(String html){
if(StringUtils.isBlank(html) || !html.contains("<table")){
return html;
}
Document document = Jsoup.parse(html);
document.outputSettings().prettyPrint(false);
Elements tables = document.select(Label.table);
if(CollectionUtils.isEmpty(tables)){
return html;
}
for(Element table : tables){
String subTable = convertTable(table);
table.after(subTable);
table.remove();
}
return document.body().html();
}
/**
* 单个table网页 表格转换为 markdown形式
* @param table
* @return
*/
public static String convertTable(Element table) {
MarkdownCell[][] matrix = convert2Matrix(table);
return printOutMatrix2MarkDown(matrix);
}
/**
* 转换成二维数组
* @param table
* @return
*/
public static MarkdownCell[][] convert2Matrix(Element table){
int rowsLength = getRowLength(table);
int colsLength = getColsLength(table);
MarkdownCell[][] matrix = new MarkdownCell[rowsLength][colsLength];
Elements trs = table.select(Label.tr);
for(int i = 0; i < trs.size(); ++i){
Element tr = trs.get(i);
Elements children = tr.children();
if(CollectionUtils.isEmpty(children)){
continue;
}
for(int j = 0 , index = 0; index < children.size() ; ){
//已经填充过说明是合并过的单元格
if(matrix[i][j] != null){
++ j;
continue;
}
Element node = children.get(index++);
int rowSpan = getRowspan(node);
int colSpan = getColspan(node);
String content = node.text();
fillCells(matrix,i,j,rowSpan,colSpan,content);
j +=colSpan;
}
}
return matrix;
}
/**
* 打印单元格
* @param matrix
* @return
*/
private static String printOutMatrix2MarkDown(MarkdownCell[][] matrix) {
int rows = matrix.length;
int cols = matrix[0].length;
StringBuilder sb = new StringBuilder();
for(int i = 0; i < rows; ++ i){
sb.append("|");
for(int j = 0 ; j < cols; ++ j){
String content = matrix[i][j] == null?"":matrix[i][j].getContent();
sb.append(" ").append(content)
.append(" ").append("|");
}
sb.append("\n");
//markdown首行
if(i == 0 ){
sb.append("|");
for(int j = 0 ; j < cols; ++ j){
sb.append(" ").append("---")
.append(" ").append("|");
}
sb.append("\n");
}
}
return sb.toString();
}
/**
* 填充单元格
* @param matrix
* @param rowSpan
* @param colSpan
* @param content
*/
private static void fillCells(MarkdownCell[][] matrix, int startRows, int startClos,int rowSpan, int colSpan, String content) {
for(int i = startRows; i < startRows + rowSpan; ++i){
for(int j = startClos; j < startClos + colSpan; ++j){
matrix[i][j] = MarkdownCell.builder().content(content).build();
}
}
}
/**
* 获取table有多少列
* @param table
* @return
*/
public static int getColsLength(Element table){
int total = 0;
Element tr = table.select(Label.tr).first();
Elements headers = tr.children();
for(Element element: headers){
total += getColspan(element);
}
return total;
}
/**
* 获取table有多少行
* @param table
* @return
*/
public static int getRowLength(Element table){
Elements trs = table.select(Label.tr);
if(CollectionUtils.isEmpty(trs)){
throw new RuntimeException("resolving table error : tr not found");
}
return trs.size();
}
/**
* 获取合并的列数
* @param cell
* @return
*/
private static int getColspan(Element cell) {
int colspan = 1;
if(cell.hasAttr(attr.colspan)) {
try {
colspan = Integer.parseInt(cell.attr(attr.colspan));
} catch(NumberFormatException ex) {
// ignore invalid numbers
}
}
return colspan;
}
/**
* 获取合并的行数
* @param cell
* @return
*/
private static int getRowspan(Element cell) {
int rowspan = 1;
if(cell.hasAttr(attr.rowspan)) {
try {
rowspan = Integer.parseInt(cell.attr(attr.rowspan));
} catch(NumberFormatException ex) {
// ignore invalid numbers
}
}
return rowspan;
}
html table 表格 转 markdown 标准表格
最新推荐文章于 2024-07-01 14:35:18 发布