package com.medipath.helper.web;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.medipath.helper.web.vo.Node;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
public class TableToMatrix {
public static void main(String[] args) {
String html = "<table>\n" +
" <tr>\n" +
" <td>\n" +
" <p>公司</p></td>\n" +
" <td>\n" +
" <p>克隆号</p></td>\n" +
" <td colspan=\"4\" >\n" +
" <p>即用型(ml)</p></td>\n" +
" <td colspan=\"2\" >\n" +
" <p>原液(ml)</p></td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td>\n" +
" <p>中杉金桥</p></td>\n" +
" <td>\n" +
" <p>OTI1A4</p></td>\n" +
" <td>\n" +
" <p>1.5</p></td>\n" +
" <td>\n" +
" <p>3</p></td>\n" +
" <td>\n" +
" <p>6</p></td>\n" +
" <td>\n" +
" <p>/</p></td>\n" +
" <td>\n" +
" <p>0.1</p></td>\n" +
" <td>\n" +
" <p>0.2</p></td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td rowspan=\"2\" >\n" +
" <p>安必平</p></td>\n" +
" <td>\n" +
" <p>ALK-1</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>1.5</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>3</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>6</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>/</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>0.1</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>0.2</p></td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td>\n" +
" <p>5A4</p></td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td rowspan=\"2\" >\n" +
" <p>基因科技</p></td>\n" +
" <td>\n" +
" <p>5A4</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>/</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>2</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>4</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>7</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>/</p></td>\n" +
" <td rowspan=\"2\" >\n" +
" <p>0.2</p></td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td>\n" +
" <p>SP8</p></td>\n" +
" </tr>\n" +
" <tr>\n" +
" <td>\n" +
" <p>百凌生物</p></td>\n" +
" <td>\n" +
" <p>BP6165</p></td>\n" +
" <td>\n" +
" <p>1.5</p></td>\n" +
" <td>\n" +
" <p>/</p></td>\n" +
" <td>\n" +
" <p>/</p></td>\n" +
" <td>\n" +
" <p>7</p></td>\n" +
" <td>\n" +
" <p>0.1</p></td>\n" +
" <td>\n" +
" <p>1.0</p></td>\n" +
" </tr>\n" +
"</table>";
System.out.println(tableToJson(html));
// 解析HTML
}
public static String tableToJson(String html) {
Document doc = Jsoup.parse(html);
// 获取所有的行
Elements rows = doc.select("table tr");
// 创建矩阵
List<Node> nodes = new ArrayList<>();
// 遍历行
int rowIndex = 0;
for (Element row : rows) {
// 获取行中的单元格
Elements cells = row.select("td");
// 遍历单元格
int cellIndex = 0;
for (Element cell : cells) {
// 处理跨列的单元格
int colspan = StrUtil.isNotBlank(cell.attr("colspan"))?Integer.parseInt(cell.attr("colspan")):1;
for (int i = 0; i < colspan; i++) {
// 获取单元格中的文本内容
String value = cell.text();
nodes.add(Node.builder().row(rowIndex).col(cellIndex).value(value).build());
int rowspan = StrUtil.isNotBlank(cell.attr("rowspan"))?Integer.parseInt(cell.attr("rowspan")):1;
if (rowspan > 1) {
for (int i2 = 1; i2 < rowspan; i2++) {
// 获取单元格中的文本内容
nodes.add(Node.builder().row(rowIndex+i2).col(cellIndex).value(value).build());
}
}
cellIndex++;
}
// 处理跨行的单元格
}
rowIndex ++;
}
// 打印矩阵
int index = 0;
String str = "";
nodes.sort(Comparator.comparingInt(Node::getCol));
nodes.sort(Comparator.comparingInt(Node::getRow));
for (Node node : nodes) {
if (node.getRow() != index) {
str += " |\n ";
index++;
}
str += " | "+node.getValue();
}
return str ;
}
}
package com.medipath.helper.web.vo;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class Node {
private Integer row;
private Integer col;
private String value;
}