读取word把数据存入excel

package com.example.word2excel;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;


import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Pattern;

public class WordReader {
    public static void main(String[] args) {
        String filePath = "E:\\*药膳学-药膳.docx";
        List<Map<String, String>> list=paragraphs(filePath);
        String filePath2 = "E:\\*药膳配方10.xlsx";
        saveExcel(filePath2,list);
    }
    public static List<Map<String, String>> paragraphs(String filePath){
    List<Map<String, String>> list=new ArrayList<>();

        String pattern =  "^[^\\【\\d].*";// 匹配不以【或数字开头的字符串
        String notPattern= "^[^,。、)(]*$";
        String tempContent="";
        int isNew=0;
        String type="";
        try {
            // 打开Word文档
            XWPFDocument doc = new XWPFDocument(Files.newInputStream(Paths.get(filePath)));
            // 获取所有段落
            List<XWPFParagraph> paragraphs = doc.getParagraphs();
            Map<String, String> foodMap = new HashMap<>();
            for (XWPFParagraph paragraph : paragraphs) {
                // 段落文本
                String text = paragraph.getText().trim();
                if(text.isEmpty()){
                    continue;
                }
                if (Pattern.matches(pattern, text)) { //不以指定字符开头,产品名称,把前面的map保存在数据库中,清空map,填写新一个map
                    if(text.contains("第")&&text.contains("节 ")) {
                        type=text.substring(text.indexOf("节 ")+2);
                    }else if(Pattern.matches(notPattern, text)&&!text.contains("第")){//不包括符号的是标题,开始新的map
                        if(!foodMap.isEmpty()){
                            extractKeyValuePairs(foodMap,tempContent);
                            list.add(foodMap);
//                            System.out.println("食品完整内容:"+foodMap.size()+":" + foodMap);
                            foodMap=new HashMap<>();
                            foodMap.put("productName",text);
                            foodMap.put("类目",type);
                            tempContent="";
                        }else {
                            foodMap=new HashMap<>();
                            isNew=1;
                            foodMap.put("productName",text);
                            foodMap.put("类目",type);
                            tempContent="";
                        }
                    }else {
                        if (isNew == 1) {
                            if (tempContent.isEmpty()) {
                                tempContent = text;
                            } else {
                                tempContent = tempContent +  text;
                            }
                        }
                    }
                }else { //以指定字符开头,获得其他信息,把所有内容放在一个String中,再依【进行拆分。
                    if(tempContent.isEmpty()){
                        tempContent=text;
                    }else {
                        tempContent = tempContent +  text;
                    }
                }
            }
            if(!foodMap.isEmpty()){
                extractKeyValuePairs(foodMap,tempContent);
//                System.out.println("食品完整内容:" + foodMap);
                list.add(foodMap);
            }
            doc.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return  list;
    }
    public static void extractKeyValuePairs(Map<String, String> foodMap,String content) {
        String[] keyValuePairs = content.split("【");

        for (int i = 1; i < keyValuePairs.length; i++) {
            String keyValuePair = keyValuePairs[i];
            String[] keyValue = keyValuePair.split("】");
            if(keyValue.length>=2) {
                foodMap.put(keyValue[0], keyValue[1]);
            }else {
                foodMap.put(keyValue[0],"");
            }
            if("应用".equals(keyValue[0])&&keyValue[1]!=null&&keyValue[1].contains("证。")){
                String[] keyValue2 =keyValue[1].split("证。");
                if(keyValue2.length>=1) {
                    foodMap.put("证型", keyValue2[0]+"证");
                }else {
                    foodMap.put("证型","");
                }
            }
        }
    }
    public static void saveExcel(String filePath, List<Map<String, String>> data) {

        try (Workbook workbook = new XSSFWorkbook(); // 创建一个新的 Excel 工作簿
             FileOutputStream outputStream = new FileOutputStream(filePath)) { // 创建文件输出流

            String[] keyss={"类目","productName","组成","功效","方解","来源","使用注意","制法用法","应用","证型","附方"};

            // 创建工作表
            Sheet sheet = workbook.createSheet("Sheet1");

            // 获取所有键集合
            Map<String, String> firstRecord = data.get(0);
            Set keys = firstRecord.keySet();

            // 创建表头
            Row headerRow = sheet.createRow(0);
            int cellIndex = 0;
            for (String key : keyss) {
                Cell cell = headerRow.createCell(cellIndex++);
                cell.setCellValue(key);
            }

            // 添加数据行
            int rowIndex = 1;
            for (Map<String, String> record : data) {
                Row dataRow = sheet.createRow(rowIndex++);
                cellIndex = 0;
                for (Object key : keyss) {
                    Cell cell = dataRow.createCell(cellIndex++);
                    String value = record.get(key);
                    if (value != null) {
                        cell.setCellValue(value);
                    } else {
                        cell.setCellValue("");
                    }
                }
            }

            // 写入文件
            workbook.write(outputStream);

        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

}
POM.xml中添加 
<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>5.2.2</version>
        </dependency>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.2.2</version>
        </dependency>

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值