解析html文件生成excel

实体类

import lombok.Data;

@Data
public class HtmlToExcel {

    private String method;

    private String url;

    private String decs;

}

实现类

import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

public class demo1 {

    // 需要转换的文件路径
    private static final String DocPath = "";

    // 文件输出路径
    private static final String filePath = "";

    // sheet名
    private static final String sheetName = "";

    public static void main(String[] args) throws IOException {
        // 1.将xml转为流
        HSSFWorkbook workbook = new HSSFWorkbook();
        BufferedReader br = new BufferedReader(new FileReader(DocPath));
        String contentLine ;
        List<String> des = new ArrayList<>();
        List<String> methods = new ArrayList<>();
        List<String> urls = new ArrayList<>();

        // 2.逐行获取所需数据
        while ((contentLine = br.readLine()) != null) {
            if (contentLine.contains("h3") && contentLine.contains("a>2")) {
                String decs = contentLine.substring(contentLine.indexOf(". ") + 2,
                        contentLine.indexOf("</h3>"));
                des.add(decs);
            }
            if (contentLine.contains("<pre>")) {
                String method = contentLine.substring(contentLine.indexOf("<pre>") + 5,
                        contentLine.indexOf("T ") + 2);
                methods.add(method);
                String url = contentLine.substring(contentLine.indexOf("T ") + 2,
                        contentLine.indexOf("</pre>"));
                urls.add(url);
            }
        }
        List<String> unknowns = des.stream().filter(x -> x.contains("POST") ||
                x.contains("GET")).collect(Collectors.toList());
        System.out.println("没有描述的接口,需要手动加入: " + unknowns);

        // 将特殊值拿出来
        List<HtmlToExcel> abnormalList = new ArrayList<>();
        for (String unknown : unknowns) {
            HtmlToExcel abnormal = new HtmlToExcel();
            String prefix = unknown.substring(0, unknown.indexOf(" /"));
            String suffix = unknown.substring(unknown.indexOf(" /") + 2);
            abnormal.setMethod(prefix);
            abnormal.setUrl(suffix);
            abnormalList.add(abnormal);
        }

        List<String> describe = des.stream().filter(x -> !x.contains("POST") &&
                !x.contains("GET")).collect(Collectors.toList());
        List<HtmlToExcel> list = new ArrayList<>();

        // 3.将数据打包
        for (int i = 0; i < describe.size(); i++) {
            HtmlToExcel htmlToExcel = new HtmlToExcel();
            htmlToExcel.setDecs(describe.get(i));
            htmlToExcel.setMethod(methods.get(i));
            htmlToExcel.setUrl(urls.get(i));
            list.add(htmlToExcel);
        }
        br.close();

        // 4.数据转为Excel格式
        HSSFSheet sheet = workbook.createSheet(sheetName);
        HSSFRow rowHead = sheet.createRow((short)0);
        rowHead.createCell(0).setCellValue("请求方式");
        rowHead.createCell(1).setCellValue("请求地址");
        rowHead.createCell(2).setCellValue("接口描述");

        for (int i = 0; i < list.size(); i++) {
            HSSFRow row = sheet.createRow((short) (i + 1));
            HtmlToExcel bill = list.get(i);
            row.createCell(0).setCellValue(bill.getMethod());
            row.createCell(1).setCellValue(bill.getUrl());
            row.createCell(2).setCellValue(bill.getDecs());
        }
        // 将特殊值插入excel,手动填写
        for (int i = 0; i< abnormalList.size(); i++) {
            HSSFRow row = sheet.createRow((short) (i + 1));
            HtmlToExcel bill = abnormalList.get(i);
            row.createCell(0).setCellValue(bill.getMethod());
            row.createCell(1).setCellValue(bill.getUrl());
        }

        //auto column width 自适应列宽
        HSSFRow row = workbook.getSheetAt(0).getRow(0);
        for(int colNum = 0; colNum < row.getLastCellNum(); colNum++){
            workbook.getSheetAt(0).autoSizeColumn(colNum);
        }

        // 5.导出文件
        try {
            File file = new File(filePath);
            // 创建文件
            file.createNewFile();
            FileOutputStream fout = new FileOutputStream(filePath);
            workbook.write(fout);
            String str = "导出成功!";
            System.out.println(str);
            fout.close();
        } catch (Exception e) {
            e.printStackTrace();
            String str1 = "导出失败!";
            System.out.println(str1);
        }
    }
}

实现逻辑:

        1.将html转换为流,逐行输出

        2.根据关键字获取需要提取的字段,封装给实体类

        3.将不正常输出的对象封装给另一个实体类

        4.将数据转换为excel对象

        5.生成文件

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值