package com.example.word2excel;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Pattern;
public class WordReader {
public static void main(String[] args) {
String filePath = "E:\\*药膳学-药膳.docx";
List<Map<String, String>> list=paragraphs(filePath);
String filePath2 = "E:\\*药膳配方10.xlsx";
saveExcel(filePath2,list);
}
public static List<Map<String, String>> paragraphs(String filePath){
List<Map<String, String>> list=new ArrayList<>();
String pattern = "^[^\\【\\d].*";// 匹配不以【或数字开头的字符串
String notPattern= "^[^,。、)(]*$";
String tempContent="";
int isNew=0;
String type="";
try {
// 打开Word文档
XWPFDocument doc = new XWPFDocument(Files.newInputStream(Paths.get(filePath)));
// 获取所有段落
List<XWPFParagraph> paragraphs = doc.getParagraphs();
Map<String, String> foodMap = new HashMap<>();
for (XWPFParagraph paragraph : paragraphs) {
// 段落文本
String text = paragraph.getText().trim();
if(text.isEmpty()){
continue;
}
if (Pattern.matches(pattern, text)) { //不以指定字符开头,产品名称,把前面的map保存在数据库中,清空map,填写新一个map
if(text.contains("第")&&text.contains("节 ")) {
type=text.substring(text.indexOf("节 ")+2);
}else if(Pattern.matches(notPattern, text)&&!text.contains("第")){//不包括符号的是标题,开始新的map
if(!foodMap.isEmpty()){
extractKeyValuePairs(foodMap,tempContent);
list.add(foodMap);
// System.out.println("食品完整内容:"+foodMap.size()+":" + foodMap);
foodMap=new HashMap<>();
foodMap.put("productName",text);
foodMap.put("类目",type);
tempContent="";
}else {
foodMap=new HashMap<>();
isNew=1;
foodMap.put("productName",text);
foodMap.put("类目",type);
tempContent="";
}
}else {
if (isNew == 1) {
if (tempContent.isEmpty()) {
tempContent = text;
} else {
tempContent = tempContent + text;
}
}
}
}else { //以指定字符开头,获得其他信息,把所有内容放在一个String中,再依【进行拆分。
if(tempContent.isEmpty()){
tempContent=text;
}else {
tempContent = tempContent + text;
}
}
}
if(!foodMap.isEmpty()){
extractKeyValuePairs(foodMap,tempContent);
// System.out.println("食品完整内容:" + foodMap);
list.add(foodMap);
}
doc.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
return list;
}
public static void extractKeyValuePairs(Map<String, String> foodMap,String content) {
String[] keyValuePairs = content.split("【");
for (int i = 1; i < keyValuePairs.length; i++) {
String keyValuePair = keyValuePairs[i];
String[] keyValue = keyValuePair.split("】");
if(keyValue.length>=2) {
foodMap.put(keyValue[0], keyValue[1]);
}else {
foodMap.put(keyValue[0],"");
}
if("应用".equals(keyValue[0])&&keyValue[1]!=null&&keyValue[1].contains("证。")){
String[] keyValue2 =keyValue[1].split("证。");
if(keyValue2.length>=1) {
foodMap.put("证型", keyValue2[0]+"证");
}else {
foodMap.put("证型","");
}
}
}
}
public static void saveExcel(String filePath, List<Map<String, String>> data) {
try (Workbook workbook = new XSSFWorkbook(); // 创建一个新的 Excel 工作簿
FileOutputStream outputStream = new FileOutputStream(filePath)) { // 创建文件输出流
String[] keyss={"类目","productName","组成","功效","方解","来源","使用注意","制法用法","应用","证型","附方"};
// 创建工作表
Sheet sheet = workbook.createSheet("Sheet1");
// 获取所有键集合
Map<String, String> firstRecord = data.get(0);
Set keys = firstRecord.keySet();
// 创建表头
Row headerRow = sheet.createRow(0);
int cellIndex = 0;
for (String key : keyss) {
Cell cell = headerRow.createCell(cellIndex++);
cell.setCellValue(key);
}
// 添加数据行
int rowIndex = 1;
for (Map<String, String> record : data) {
Row dataRow = sheet.createRow(rowIndex++);
cellIndex = 0;
for (Object key : keyss) {
Cell cell = dataRow.createCell(cellIndex++);
String value = record.get(key);
if (value != null) {
cell.setCellValue(value);
} else {
cell.setCellValue("");
}
}
}
// 写入文件
workbook.write(outputStream);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
POM.xml中添加
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>