代码生成docx数据表转换为xml文件

最新推荐文章于 2024-06-27 00:07:49 发布

zerone-f

最新推荐文章于 2024-06-27 00:07:49 发布

阅读量1.6k

点赞数

分类专栏： java 文章标签： Docx表转换为xml文档

本文链接：https://blog.csdn.net/someby/article/details/103400699

版权

java 专栏收录该内容

64 篇文章 1 订阅

订阅专栏

背景

在参与到软件设计的文档修改时，需要将docx文档中的数据表录入到xml文档中，在第一次录入时，一个字段一个字段的录入实在是太过麻烦，遇到有几十个字段的表，眼睛都看花了还是录不完，于是想着使用代码去读取相应的表格，将其中的数据表生成xml文件。
当然，在写脚本的时候，大家可能首先想到的是使用Python进行编写。由于我的电脑中没有安装Python环境，就使用已有的环境编写了Java代码进行转换。在读取表的时候，还有很多问题可以改进，但是由于文档的规范不同，处理还不是很全面，现在将自己实现的额进行记录。

DOCX的数据表

表格式一

表格式二

针对遇到的两种格式的表，进行编写代码，生成相应的xml文件

代码实现

Docx2XMLUtil.java

package docx2xml;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

/**
 * @ClassName Docx2XMLUtil
 * @Author StriveFarrell
 * @Date 2019/12/4 15:22
 * @Description
 * 将docx文档章的表格转化为xml文档
 */

public class Docx2XMLUtil {
    private  String docxFilePath ;
    private  String xmlFileSavePath ;
    private  String author;

    public String getDocxFilePath() {
        return docxFilePath;
    }

    public void setDocxFilePath(String docxFilePath) {
        this.docxFilePath = docxFilePath;
    }

    public String getXmlFileSavePath() {
        return xmlFileSavePath;
    }

    public void setXmlFileSavePath(String xmlFileSavePath) {
        this.xmlFileSavePath = xmlFileSavePath;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public void getTableData(){
        try {
            String filePath = getDocxFilePath();
            if (filePath.toLowerCase().endsWith("docx")){
                docx2xml();
            }else if (filePath.toLowerCase().endsWith(".doc")){
                doc2xml();
            }
        }catch (Exception e){
            e.printStackTrace();
        }
    }

    private void docx2xml(){
        XWPFDocument document = getXWPFDocument();

        Iterator<XWPFTable> tabItr = document.getTablesIterator();
        String tableHeaderInfo = getTableHeader();
        String remInfo = getRemInfo();
        int tableIndex = 1;
        while (tabItr.hasNext()){
            StringBuffer tablexml = new StringBuffer(tableHeaderInfo);
            tablexml.append(remInfo);

            XWPFTable table = tabItr.next();
            String tableColumnInfo = getTableColumn(table);
            tablexml.append(tableColumnInfo);

            String xmlString = tablexml.toString()+getEndTableTag()+"\n\n\n\n";
            testPrint(String.valueOf(tableIndex), xmlString);
            saveXml(xmlString);
            tableIndex++;
        }
    }
    private  void doc2xml(){

    }

    /**
     * 打印测试
     * @param message
     * @param out
     */
    private  void testPrint(String message,String out){
        System.out.println(message+":\n"+out);
    }

    /**
     * 获取当前的日期，格式为yyyy.MM.dd
     * @return
     */
    private  String getDate(){
        SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd");
        return df.format(new Date());
    }

    private  void saveXml(String data){
        String saveXmlPath = getXmlFileSavePath();
        try {
            FileWriter fw = new FileWriter(saveXmlPath, true);
            BufferedWriter bw = new BufferedWriter(fw);
            bw.write(data);
            bw.close();
            fw.close();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    /**
     * 判断数据类型是否有长度
     * @param cell
     * @return
     */
    private  boolean hasSize(String cell){
        if ("DATETIME".equalsIgnoreCase(cell)||"TEXT".equalsIgnoreCase(cell) || "TIMESTAMP".equalsIgnoreCase(cell) || "LONGTEXT".equalsIgnoreCase(cell)){
            return false;
        }
        return true;
    }

    /**
     * 判断表格格式，tableStyle标记表的格式，字段和长度在一起为true，分开为false
     * @param header
     * @return
     */
    private boolean getTableStyle(List<XWPFTableCell> header) {
        boolean isContains = false ;
        Iterator<XWPFTableCell> cellIterator = header.iterator();
        while (cellIterator.hasNext()){
            String cel = cellIterator.next().getText();
            if (cel.contains("长度")){
                isContains = true;
                break;
            }
        }
        return isContains;
    }

    /**
     * 获取文件输入流
     * @return
     */
    private FileInputStream getFileInputStream(){
        FileInputStream in = null;
        try {
            in = new FileInputStream(getDocxFilePath());
        }catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        return in;
    }

    /**
     * 获取docx文件流
     * @return
     */
    private XWPFDocument getXWPFDocument(){
       FileInputStream in = getFileInputStream();
       XWPFDocument document = null;
        try {
            document = new XWPFDocument(in);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return document;
    }

    /**
     * 获取表同的格式
     * @return
     */
    private String getTableHeader(){
        String tableTagStart = "<table ";
        String tableId = "id=";
        String javaId = "javaId=";
        String tableName = "name=";
        String tableTagEnd = ">";
        StringBuffer headBuffer = new StringBuffer(tableTagStart);
        headBuffer.append(tableId+"\"\" ");
        headBuffer.append(javaId+"\"\" ");
        headBuffer.append(tableName+"\"\" ");
        headBuffer.append(tableTagEnd+"\n");
        return headBuffer.toString();
    }

    /**
     * 返回table的闭合标签
     * @return
     */
    private String getEndTableTag(){
        return "<\\table>";
    }

    /**
     * 获取创建人的信息
     * @return
     */
    private  String getRemInfo(){
        String remInfo = "\t<rem>====================================================================</rem>\n" +
                        "\t<rem> 输入人:"+ author +"\t输入时间："+ getDate()+"</rem>\n" +
                         "\t<rem>table description</rem>\n"+
                        "\t<rem>====================================================================</rem>\n";
        return remInfo;
    }

    /**
     * 遍历获取每一列的数据
     * @param table
     * @return
     */
    private String getTableColumn(XWPFTable table) {
        String tag = "\t<column ";
        String id = "id=";
        String type = "type=";
        String size = "size=";
        String primaryKey = "primaryKey=";
        String required = "required=";
        String name = "name=";
        String end = " />\n";
        StringBuffer tableColumsBuffer = new StringBuffer();
        List<XWPFTableRow> rowList = table.getRows();
        //tableStyle标记表的格式，字段和长度在一起为true，分开为false
        boolean tableStyle = false;
        tableStyle = getTableStyle(rowList.get(0).getTableCells());

        for (int i = 1; i < rowList.size(); i++) {
            StringBuffer rowBUffer = new StringBuffer(tag);
            XWPFTableRow row = rowList.get(i);
            List<XWPFTableCell> cellList = row.getTableCells();
            boolean isHasSize = false;
            for (int j = 0; j < cellList.size(); j++) {
                String cell = cellList.get(j).getText().trim().toUpperCase();
                switch (j) {
                    case 0:
                        String newId = id + "\"" + cell + "\" ";
                        rowBUffer.append(newId);
                        break;
                    case 1:
                        if (!tableStyle){
                            if (cell.contains("(")) {
                                int startIndex = cell.indexOf("(");
                                int endIndex = cell.indexOf(")");
                                String cellType = cell.substring(0, startIndex);
                                String cellSize = cell.substring(startIndex + 1, endIndex);
                                String newType = type + "\"" + cellType + "\" ";
                                rowBUffer.append(newType);
                                String newSize = size + "\"" + cellSize + "\" ";
                                rowBUffer.append(newSize);
                            } else {
                                String newType = type + "\"" + cell + "\" ";
                                rowBUffer.append(newType);
                            }
                        }else {
                            isHasSize = hasSize(cell);
                            String newType = type + "\"" + cell + "\" ";
                            rowBUffer.append(newType);
                        }
                        break;
                    case 2:
                        if (isHasSize) {
                            String newSize = size + "\"" +cell + "\" ";
                            rowBUffer.append(newSize);
                            isHasSize = false;
                        }
                        break;
                    case 3:
                        String newPrimaryKey = "";
                        String newRequired = "";
                        if (cell.contains("主键")) {
                            newPrimaryKey = primaryKey + "\"true\" ";
                        } else {
                            newPrimaryKey = primaryKey + "\"false\" ";
                        }
                        if (cell.contains("非空")) {
                            newRequired = required + "\"true\" ";
                        } else {
                            newRequired = required + "\"false\" ";
                        }
                        rowBUffer.append(newPrimaryKey);
                        rowBUffer.append(newRequired);
                        break;
                    case 4:
                        String newName = name + "\"" + cell + "\"";
                        rowBUffer.append(newName);
                        rowBUffer.append(end);
                        break;
                    default:
                }
            }
            tableColumsBuffer.append(rowBUffer.toString());
        }
        return tableColumsBuffer.toString();
    }
}

Docx2XMLUtilTest.java

package docx2xml;


/**
 * @ClassName Docx2XMLUtilTest
 * @Author StriveFarrell
 * @Date 2019/12/4 16:12
 * @Description
 * docx文档转换为xml文档的测试类
 */

public class Docx2XMLUtilTest {
    //docx文件所在文件路径
    private static final String docxFilePath = "D:\\MyFile\\workLearning\\spark\\src\\files\\docx2xml3.docx";
    //生成的xml文件保存路径
    private static final String xmlFileSavePath = "D:\\MyFile\\workLearning\\spark\\src\\files\\docx2xml3.xml";
    //表格录入人
    private static final String author = "Hello Table";
    public static void main(String[] args){
        Docx2XMLUtil util = new Docx2XMLUtil();
        util.setDocxFilePath(docxFilePath);
        util.setAuthor(author);
        util.setXmlFileSavePath(xmlFileSavePath);
        util.getTableData();
    }
}

生成XML格式

表格式一XML

<table id="" javaId="" name="" >
   <rem>====================================================================</rem>
   <rem> 输入人:Hello Table    输入时间：2019.12.05</rem>
   <rem>table description</rem>
   <rem>====================================================================</rem>
   <column id="COLUMN_ID" type="VARCHAR" size="20" primaryKey="true" required="true" name="信息项定义主键" />
   <column id="IS_PRIMARY_KEY" type="INT" size="2" primaryKey="false" required="true" name="表明是否为主键(0:否;1:是)" />
   <column id="NOT_NULL" type="INT" size="2" primaryKey="false" required="true" name="0:可以为空；1：不可为空。" />
   <column id="LENGTH" type="VARCHAR" size="11" primaryKey="false" required="true" name="长度" />
   <column id="TYPE" type="VARCHAR" size="22" primaryKey="false" required="true" name="类型" />
   <column id="NAME_EN" type="VARCHAR" size="2048" primaryKey="false" required="false" name="英文名称" />
   <column id="NAME_CN" type="VARCHAR" size="2048" primaryKey="false" required="false" name="中文名称" />
   <column id="CATA_ID" type="VARCHAR" size="20" primaryKey="false" required="true" name="目录ID" />
<\table>

表格式二XML

<table id="" javaId="" name="" >
   <rem>====================================================================</rem>
   <rem> 输入人:zhangqx02    输入时间：2019.12.05</rem>
   <rem>table description</rem>
   <rem>====================================================================</rem>
   <column id="COLUMN_ID" type="VARCHAR" size="20" primaryKey="true" required="true" name="信息项定义主键" />
   <column id="IS_PRIMARY_KEY" type="INT" size="2" primaryKey="false" required="true" name="表明是否为主键(0:否;1:是)" />
   <column id="NOT_NULL" type="INT" size="2" primaryKey="false" required="true" name="0:可以为空；1：不可为空。" />
   <column id="LENGTH" type="VARCHAR" size="11" primaryKey="false" required="true" name="长度" />
   <column id="TYPE" type="VARCHAR" size="22" primaryKey="false" required="true" name="类型" />
   <column id="NAME_EN" type="VARCHAR" size="2048" primaryKey="false" required="false" name="英文名称" />
   <column id="NAME_CN" type="VARCHAR" size="2048" primaryKey="false" required="false" name="中文名称" />
   <column id="CATA_ID" type="VARCHAR" size="20" primaryKey="false" required="true" name="目录ID" />
<\table>

这个表还有很多不完善的地方，比如没有生成table标签的id,javaId和name的一些字段，以后有时间在去处理。