JAVA万能模板导入,提取关键列

最近接到需求:要求支持各种不确定格式的模板,且识别后将数据返回给前端处理

引入以下maven依赖


		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
			<version>3.10-FINAL</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>3.10-FINAL</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml-schemas</artifactId>
			<version>3.10-FINAL</version>
		</dependency>

基本思路为:

先确定传入的excel的标题是哪些行,有些是多级表头,所以存在多行表头

再就是从表头中提取我们实际需要的列是哪些 

将每个表头和字典中的关键字匹配来得出对应的哪个属性

然后创建一个map用下标做key来将需要的列标注起来,后面就知道每个列所对应的属性了

代码实现 

比如这种表头,我们需要物料名称,规格,数量,单价

 再比如这种我们需要名称,规格/型号,CAS号,数量,单价

 这种就通过字典来完成与字段的配对

//这里选择继承是因为要支持多个字典,每个字典一个子类
public class SmartSmartExcelQuoteDict extends SmartExcelDict {


    //获取字典map
    public Map<String,String[]> getDictMap(){
        String[] NAME_DICT={"NAME","产品名称","名称","商品名称","产品名","商品名","物料名称","中文名称"};
        String[] SPEC_DICT={"SPEC","规格","技术规格","产品规格","规格型号","型号","规格或指标","规格/型号"};
        String[] QUANTITY_DICT={"QUANTITY ","QTY","数量","销售数量"};
        String[] PACKAGE_DICT={"PACKAGE","销售包装"};
        String[] PRICE_DICT={"PRICE","UNIT PRICE","单价","价格"};
        //String[] AMOUNT_DICT={"AMOUNT","TOTAL PRICE","金额合计","金额","总价","总金额","总计","合计","小计"};
        String[] CAS_NO_DICT={"CAS.NO","CAS号","CAS"};

        Map<String,String[]> dict=new HashMap<String,String[]>();
        //从上到下依次匹配,设置优先级
        dict.put("NAME",NAME_DICT);
        dict.put("SPEC",SPEC_DICT);
        dict.put("QUANTITY",QUANTITY_DICT);
        dict.put("PACKAGE",PACKAGE_DICT);
        dict.put("PRICE",PRICE_DICT);
        //dict.put("AMOUNT",AMOUNT_DICT);
        dict.put("CAS_NO",CAS_NO_DICT);
        return dict;
    }

    //查找字典,确认对应的属性
    public String find(String title){
        if(StringUtil.isEmpty(title)){
            return "";
        }
        Map<String,String[]> dict=getDictMap();
        for (Map.Entry<String, String[]> entry : dict.entrySet()) {
            String mapKey = entry.getKey();
            String[] mapValue = entry.getValue();
            for (String val:mapValue){
                if(val.equals(title.toUpperCase().trim())){
                    return mapKey;
                }
            }
        }
        return "";
    }

    //将返回的List转换为实体list
    public List<SmartExcelQuoteProduct> toModel(List<Map<String,Object>> list){
        List<SmartExcelQuoteProduct> modelList=new ArrayList<>();
        for (Map<String,Object> map:list) {
            String name= MapUtils.getString(map,"NAME");
            String spec= MapUtils.getString(map,"SPEC");
            String quantity= MapUtils.getString(map,"QUANTITY");
            String price= MapUtils.getString(map,"PRICE");
            String salepackage= MapUtils.getString(map,"PACKAGE");
            String casNo= MapUtils.getString(map,"CAS_NO");
            SmartExcelQuoteProduct model=new SmartExcelQuoteProduct();
            model.setName(name);
            model.setSpec(spec);
            model.setQuantity(quantity);
            model.setPrice(price);
            model.setSalePackage(salepackage);
            model.setCasNo(casNo);
            modelList.add(model);
        }
        return modelList;
    }

    public static void main(String[]args){
        SmartSmartExcelQuoteDict excelDict=new SmartSmartExcelQuoteDict();
        String key=excelDict.find("价格");
        System.out.println("标题映射:"+key);
    }

}

继承自的父类

public class SmartExcelDict {

    //这个方法可以写成空方法,但不能删除
    public Map<String,String[]> getDictMap(){
        String[] NAME_DICT={"NAME"};

        Map<String,String[]> dict=new HashMap<String,String[]>();
        //从上到下依次匹配,设置优先级
        dict.put("NAME",NAME_DICT);
        return dict;
    }

    //这个方法可以写成空方法,但不能删除
    public String find(String title){
        if(StringUtil.isEmpty(title)){
            return "";
        }
        Map<String,String[]> dict=getDictMap();
        for (Map.Entry<String, String[]> entry : dict.entrySet()) {
            String mapKey = entry.getKey();
            String[] mapValue = entry.getValue();
            for (String val:mapValue){
                if(val.equals(title.toUpperCase().trim())){
                    return mapKey;
                }
            }
        }
        return "";
    }

}

主要的工具类

public class SmartExcelUtils {

    private SmartExcelDict smartExcelDict;

    public SmartExcelUtils(){

    }

    public SmartExcelUtils(SmartExcelDict smartExcelDict2){
        //后续支持多种模板,创建时带入模板类型
        smartExcelDict = smartExcelDict2;
    }

    @RequestMapping("/importExcel")
    public Result importExcel(@RequestParam(value="file", required = false) MultipartFile multfile) throws Exception  {
        SmartSmartExcelQuoteDict excelDict=new SmartSmartExcelQuoteDict();
        SmartExcelUtils smartExcelUtils =new SmartExcelUtils(excelDict);
        if (multfile == null) return  Result.failed("请上传文件");
        //log.info("{}-导入PCS数据,参数:{}", DateUtil.format(new Date(),"yyyy-MM-dd HH:mm:ss"));
        // 根据参数选择解析
        String originalFilename = multfile.getOriginalFilename();
        assert originalFilename != null;
        // 解析的数据
        List<Map<String, Object>>  mapList= new ArrayList<>();
        try {
            String expandName = originalFilename.substring(originalFilename.lastIndexOf(".") + 1, originalFilename.length());
            if (expandName.equals("xls") || expandName.equals("xlsx") ){
                mapList = smartExcelUtils.importExcel(multfile,1,2,0);
            }else {
                return Result.failed("上传的文件格式不正确,只支持xls,xlsx或者参数选择错误!");
            }
            if (CollectionUtils.isEmpty(mapList)) return Result.failed("传入的文件数据为空,请检查后再上传!!");
            log.info("解析数据数量:{}",mapList.size());
            List<SmartExcelQuoteProduct> modelList= excelDict.toModel(mapList);
            return Result.succeed(modelList,"导入数据成功!!文件名:"+originalFilename+"导入PCS解析数据数量:"+mapList.size());
        }catch (Exception e){
            log.error("数据解析失败!!!原因:{}",e.getMessage(),e);
            System.out.println("数据解析失败!!!原因:{}"+e.getMessage());
            return Result.failed("解析异常");
        }
    }

    // 判断excel版本
    public List<Map<String,Object>> importExcel(MultipartFile file,Integer titleNo,Integer dataNo,Integer sheetNo) throws IOException {
        String fileName = file.getOriginalFilename();  //获得上传的excel文件名
        assert fileName != null;
        String fileSuffix = fileName.substring(fileName.lastIndexOf(".") + 1);  //获取上传的excel文件名后缀
        List<Map<String,Object>> mapList = null;
        if ("xlsx".equals(fileSuffix)) {
            SmartExcelTitle smartExcelTitle =xlsxTitleIndex(file);//获取标题,数据列等
            mapList = xlsxImportExcel(file, smartExcelTitle.getTitleIndexMap(), smartExcelTitle.getDataRowStart(),sheetNo);
        } else if ("xls".equals(fileSuffix)) {
            SmartExcelTitle smartExcelTitle =xlsTitleIndex(file);//获取标题,数据列等
            mapList = xlsImportExcel(file, smartExcelTitle.getTitleIndexMap(), smartExcelTitle.getDataRowStart(),sheetNo);
        }
        return mapList;
    }



    /**
     * 文件解析excel2007及以上版本
     *
     * @param file
     * @return
     * @throws IOException
     */
    public List<Map<String,Object>> xlsxImportExcel(MultipartFile file,Map<Integer ,String> titleIndexMap,Integer dataNo,Integer sheetNo)throws IOException {
        log.info("excel2007及以上版本");
        XSSFWorkbook xwb = new XSSFWorkbook(file.getInputStream()); //获取excel工作簿
        XSSFSheet xssfSheet = xwb.getSheetAt(sheetNo); //获取excel的sheet
        if (xssfSheet == null) {
            return null;
        }
        List<Map<String,Object>> mapList = new ArrayList<>();
        //循环获取excel每一行
        for (int rowNum = dataNo; rowNum < xssfSheet.getLastRowNum() + 1; rowNum++) {
            XSSFRow xssfRow = xssfSheet.getRow(rowNum);
            if (xssfRow == null) {
                continue;
            }
            Map<String,Object> map = new HashMap<>();
            //循环获取excel每一行的每一列
            int i=0;
            for (int cellNum = 0; cellNum < xssfSheet.getLastRowNum(); cellNum++) {
                XSSFCell xssCell = xssfRow.getCell(cellNum);
                if (xssCell == null) {
                    continue;
                }
                try {
                    //根据列的下标获取列对应的字段
                    String k=titleIndexMap.get(cellNum);
                    if(StringUtil.isEmpty(k)){
                        continue;
                    }
                    if(getValue(xssCell)==null){
                        continue;
                    }
                    map.put(k,getValue(xssCell));
                    i++;
                }catch (Exception e){
                    log.error("");
                }
            }
            if(i>0){
                mapList.add(map);  //将excel每一行的数据封装到map对象,并将map对象添加到list
            }
        }
        return mapList;
    }


    /**
     * @param file
     * @return 文件解析 excel2003版本
     * @throws IOException
     */
    public List<Map<String,Object>> xlsImportExcel(MultipartFile file,Map<Integer ,String> titleIndexMap,Integer dataNo,Integer sheetNo) throws IOException {
        log.info("excel2003版本");
        Workbook wb = new HSSFWorkbook(file.getInputStream()); //获取excel工作簿
        Sheet sheet = wb.getSheetAt(sheetNo);  //获取excel的sheet
        if (sheet == null) {
            return null;
        }
        List<Map<String,Object>> list = new ArrayList<>();
        //循环获取excel每一行
        for (int rowNum = dataNo; rowNum < sheet.getLastRowNum() + 1; rowNum++) {
            Row row = sheet.getRow(rowNum);
            if (row == null) {
                continue;
            }
            Map<String,Object> map = new HashMap<>();
            //循环获取excel每一行的每一列
            int i=0;
            for (int cellNum = 0; cellNum < row.getLastCellNum(); cellNum++) {
                Cell cell = row.getCell(cellNum);
                if (cell == null) {
                    continue;
                }
                //根据列的下标获取列对应的字段
                String k=titleIndexMap.get(cellNum);
                if(StringUtil.isEmpty(k)){
                    continue;
                }
                if(getValue(cell)==null){
                    continue;
                }
                map.put(k,getValue(cell));
                i++;
            }
            if(i>0){
                list.add(map);  //将excel每一行的数据封装到map对象,并将map对象添加到list
            }
        }
        return list;
    }


    /**
     * @param file
     * @return 获取标题位置 excel2007及以上版本
     * @throws IOException
     */
    public SmartExcelTitle xlsxTitleIndex(MultipartFile file) throws IOException {
        Integer titleNo=0;
        Integer sheetNo=0;
        log.info("excel2007及以上版本");
        XSSFWorkbook xwb = new XSSFWorkbook(file.getInputStream()); //获取excel工作簿
        XSSFSheet xssfSheet = xwb.getSheetAt(sheetNo); //获取excel的sheet
        if (xssfSheet == null) {
            return null;
        }
        Row rowTitle = xssfSheet.getRow(titleNo);

        Map<String ,String> cellMap=new HashMap<>();
        Map<Integer ,String> titleIndexMap=new HashMap<>();
        int startRow=-1;    //标题开始行
        int endRow=-1;      //标题结束行
        int dataRow=-1;     //数据开始行
        boolean isFor=true;
        for (int rowNum = 0; rowNum < xssfSheet.getLastRowNum() + 1 && isFor; rowNum++) {
            XSSFRow xssfRow = xssfSheet.getRow(rowNum);
            if (xssfRow == null) {
                continue;
            }
            //循环获取excel每一行的每一列
            int hit=0;
            for (int cellNum = 0; cellNum < rowTitle.getLastCellNum(); cellNum++) {
                XSSFCell xssCell = xssfRow.getCell(cellNum);
                if (xssCell == null) {
                    continue;
                }
                //用于转换单元格类型为STRING,如果直接拿值会报错
                xssCell.setCellType(Cell.CELL_TYPE_STRING);
                String cellValue=xssCell.getStringCellValue();
                //判断里面是否有名称,规格,数量等标题性质强的关键词
                String titleVal= smartExcelDict.find(cellValue);
                //判断此字段是否有过定义,列定义以第一次定义为准
                String k=titleIndexMap.get(cellNum);
                if(StringUtil.isEmpty(k)){
                    //对列进行定义
                    titleIndexMap.put(cellNum,titleVal);
                }

                if(StringUtil.isNotEmpty(titleVal)){
                    hit++;//有效列计数,如果循环结束大于0则表示此行中有标题列
                    cellMap.put(cellValue,titleVal);
                    //记录标题开始的行
                    if(startRow==-1){
                        startRow=rowNum;//设置标题开始行
                    }
                    endRow=rowNum;//设置标题结束行
                }
            }
            if(endRow!=-1 && hit==0){
                //如果上一个行有能匹配到的标题关键词,且此次没有匹配到标题,则表示标题部分结束
                isFor=false;
                dataRow=rowNum;
                continue;
            }

        }
        SmartExcelTitle smartExcelTitle =new SmartExcelTitle();
        smartExcelTitle.setTitleMap(cellMap);
        smartExcelTitle.setTitleIndexMap(titleIndexMap);
        smartExcelTitle.setTitleRowStart(startRow);
        smartExcelTitle.setTitleRowEnd(endRow);
        smartExcelTitle.setDataRowStart(dataRow);
        return smartExcelTitle;
    }

    /**
     * @param file
     * @return 获取标题位置  excel2003版本
     * @throws IOException
     */
    public SmartExcelTitle xlsTitleIndex(MultipartFile file) throws IOException {
        Integer sheetNo=0;
        log.info("excel2003版本");
        Workbook wb = new HSSFWorkbook(file.getInputStream()); //获取excel工作簿
        Sheet sheet = wb.getSheetAt(sheetNo);  //获取excel的sheet
        if (sheet == null) {
            return null;
        }

        Map<String ,String> cellMap=new HashMap<>();
        Map<Integer ,String> titleIndexMap=new HashMap<>();
        int startRow=-1;
        int endRow=-1;
        int dataRow=-1;
        boolean isFor=true;
        for (int rowNum = 0; rowNum < sheet.getLastRowNum() + 1 && isFor; rowNum++) {
            Row row = sheet.getRow(rowNum);
            if (row == null) {
                continue;
            }
            //循环获取excel每一行的每一列
            int hit=0;
            for (int cellNum = 0; cellNum < row.getLastCellNum(); cellNum++) {
                Cell cell = row.getCell(cellNum);
                if (cell == null) {
                    continue;
                }
                cell.setCellType(Cell.CELL_TYPE_STRING);
                String cellValue=cell.getStringCellValue();
                String titleVal= smartExcelDict.find(cellValue);
                String k=titleIndexMap.get(cellNum);
                if(StringUtil.isEmpty(k)){
                    titleIndexMap.put(cellNum,titleVal);
                }
                if(StringUtil.isNotEmpty(titleVal)){
                    hit++;
                    cellMap.put(cellValue,titleVal);
                    //记录标题开始的行
                    if(startRow==-1){
                        startRow=rowNum;
                    }
                    endRow=rowNum;
                }
            }
            if(endRow!=-1 && hit==0){
                //如果上一个行有能匹配到的标题关键字,且此次不能匹配到标题关键字,则表示标题部分结束
                isFor=false;
                dataRow=rowNum;
                continue;
            }
        }
        SmartExcelTitle smartExcelTitle =new SmartExcelTitle();
        smartExcelTitle.setTitleMap(cellMap);
        smartExcelTitle.setTitleIndexMap(titleIndexMap);
        smartExcelTitle.setTitleRowStart(startRow);
        smartExcelTitle.setTitleRowEnd(endRow);
        smartExcelTitle.setDataRowStart(dataRow);
        return smartExcelTitle;
    }


    /**
     * excel值处理
     *
     * @param cell
     * @return
     */
    public Object getValue(Cell cell){
        //判断是否为null或空串
        if  (cell== null  || cell.toString().trim().equals( "" )) {
            return null ;
        }
        String cellValue;
        int  cellType=cell.getCellType();
        switch  (cellType) {
            case  Cell.CELL_TYPE_STRING:
                cellValue= cell.getStringCellValue().trim();
                cellValue= StringUtil.isEmpty(cellValue) ?  ""  : cellValue;
                break ;
            case  Cell.CELL_TYPE_BOOLEAN:
                cellValue = String.valueOf(cell.getBooleanCellValue());
                break ;
            case  Cell.CELL_TYPE_NUMERIC:
                if  (HSSFDateUtil.isCellDateFormatted(cell)) {
                    cellValue =null;// DateUtil.formatDateByFormat(cell.getDateCellValue(), "yyyy-MM-dd HH:mm:ss");
                }  else  {
                    cellValue = new DecimalFormat( "#.######" ).format(cell.getNumericCellValue());
                }
                break ;
            default :
                cellValue = null ;
                break ;
        }
        return cellValue == null ? null : cellValue.replaceAll("\\xa0", "");
    }






}

实体类

@Data
public class SmartExcelTitle {

    private int titleRowStart;//标题开始行
    private int titleRowEnd;//标题结束行
    private int dataRowStart;//数据开始行
    private Map<String ,String> titleMap; //标题列数< 标题名称,字段名称》
    private Map<Integer ,String> titleIndexMap; //标题列数< 列下标,字段名称》

}
@Data
public class SmartExcelQuoteProduct {

    private String name;        //产品名称
    private String spec;        //规格
    private String quantity;    //数量
    private String salePackage; //包装
    private String price;       //价格
    private String casNo;       //casno

}

测试

导入的excel

 返回结果

{
    "data": [
        {
            "name": "1.5mL进样瓶瓶盖(带垫片)",
            "spec": "1.5mL  100个/包",
            "quantity": "200",
            "salePackage": null,
            "price": null,
            "casNo": null
        },
        {
            "name": "小口无内盖试剂瓶",
            "spec": "125ml",
            "quantity": "200",
            "salePackage": null,
            "price": null,
            "casNo": null
        },
        {
            "name": "N-丙基乙二胺(PSA)净化填料",
            "spec": "SLB-PSA-100,100g",
            "quantity": "2",
            "salePackage": null,
            "price": null,
            "casNo": null
        },
        {
            "name": "移液枪头(配艾本德eppendorf)",
            "spec": "1mL,长度101mm",
            "quantity": "10",
            "salePackage": null,
            "price": null,
            "casNo": null
        },
        {
            "name": "苏丹红混标",
            "spec": "100ug/mL",
            "quantity": "1",
            "salePackage": null,
            "price": null,
            "casNo": null
        },
        {
            "name": "罗丹明B",
            "spec": "CAS:81-88-9;100ug/mL",
            "quantity": "1",
            "salePackage": null,
            "price": null,
            "casNo": null
        }
],
    "code": 0,
    "msg": "智能数据解析完成41"
}

导入的excel

返回的结果 

{
    "data": [
        {
            "name": "苯丙醇胺",
            "spec": null,
            "quantity": null,
            "salePackage": null,
            "price": null,
            "casNo": "37577-28-9"
        },
        {
            "name": "去甲伪麻黄碱",
            "spec": null,
            "quantity": null,
            "salePackage": null,
            "price": null,
            "casNo": "37577-07-4"
        },
        {
            "name": "麻黄碱",
            "spec": null,
            "quantity": null,
            "salePackage": null,
            "price": null,
            "casNo": "299-42-3"
        },
        {
            "name": "伪麻黄碱",
            "spec": null,
            "quantity": null,
            "salePackage": null,
            "price": null,
            "casNo": "321-97-1"
        }
],
    "code": 0,
    "msg": "智能数据解析完成33"
}

结束

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值