先添加maven依赖
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.12</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.13</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.13</version>
</dependency>
上代码
package com.safeneeds.commana.web;
import com.safeneeds.commana.ser.IUploadService;
import com.safeneeds.filemana.entity.TFileInfo;
import com.safeneeds.util.base.BaseAction;
import com.safeneeds.util.base.RetType;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.io.FileUtils;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.commons.CommonsMultipartFile;
import javax.annotation.Resource;
import java.io.*;
import java.util.UUID;
/**
* @description
* @author: czw
* @create: 2021-08-23 15:24
**/
@RestController
@RequestMapping("/wordToHtml")
public class WordToHtmlAction extends BaseAction {
private static final Logger logger = LoggerFactory.getLogger(WordToHtmlAction.class);
@Resource(name="uploadService")
private IUploadService uploadService;
@Value("${FILEBASEPATH}")
private String FILEBASEPATH;
@Value("${FileSavePath}")
private String FILE_SAVE_PATH;
/**
* 上传docx文档,返回解析后的Html
*/
@RequestMapping("/getWordHtmlInfo.idop")
public RetType docxToHtmlText(MultipartFile file) throws IOException {
RetType rt = new RetType();
ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
try {
// 将上传的文件传入Document转换
XWPFDocument docxDocument = new XWPFDocument(file.getInputStream());
XHTMLOptions options = XHTMLOptions.create();
// 设置图片存储路径
String firstImagePathStr = FILE_SAVE_PATH;
options.setExtractor(new FileImageExtractor(new File(firstImagePathStr)));
options.URIResolver(new BasicURIResolver(firstImagePathStr));
// 转换html
docxDocument.createNumbering();
XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options);
String htmlStr = htmlStream.toString();
String middleImageDirStr = "/word/media";
String imageDirStr = firstImagePathStr + middleImageDirStr;
File imageDir = new File(imageDirStr);
String[] imageList = imageDir.list();
if (imageList != null) {
for (int i = 0; i < imageList.length; i++) {
try {
String oneImagePathStr = imageDirStr + "/" + imageList[i];
File fileImage = new File(oneImagePathStr);
if (fileImage.exists()) {
String name = fileImage.getName();
//此处上传到自己的文件服务器
InputStream inputStream = new FileInputStream(fileImage);
MultipartFile multipartFile = new MockMultipartFile(name, inputStream);
RetType retType = uploadService.uplaodFile(multipartFile, name,UUID.randomUUID().toString().replaceAll("-",""));
if (!retType.isSuccess()) {
continue;
} else {
TFileInfo fileInfo = (TFileInfo) retType.getData();
//修改文档中的图片信息
htmlStr = htmlStr.replace(oneImagePathStr, FILEBASEPATH + fileInfo.getFilePath());
}
}
} catch (Exception e) {
logger.info("upload docxToHtmlText exception", e);
rt.doError(e.toString());
return rt;
}
}
}
//删除图片路径
File firstImagePath = new File(firstImagePathStr);
FileUtils.deleteDirectory(firstImagePath);
rt.doSuccess(htmlStr);
return rt;
} catch (Exception e) {
logger.error("docxToHtmlText 解析异常", e);
rt.doError(e.toString());
return rt;
} finally {
if (htmlStream != null) {
htmlStream.close();
}
}
}
/**
* 获取MultipartFile文件
*
* @param picPath
* @return
*/
private MultipartFile getMulFileByPath(String picPath) {
FileItem fileItem = createFileItem(picPath);
MultipartFile mfile = new CommonsMultipartFile(fileItem);
return mfile;
}
private FileItem createFileItem(String filePath) {
FileItemFactory factory = new DiskFileItemFactory(16, null);
String textFieldName = "textField";
int num = filePath.lastIndexOf(".");
String extFile = filePath.substring(num);
FileItem item = factory.createItem(textFieldName, "text/plain", true,
"MyFileName" + extFile);
File newfile = new File(filePath);
int bytesRead = 0;
byte[] buffer = new byte[8192];
try {
FileInputStream fis = new FileInputStream(newfile);
OutputStream os = item.getOutputStream();
while ((bytesRead = fis.read(buffer, 0, 8192))
!= -1) {
os.write(buffer, 0, bytesRead);
}
os.close();
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
return item;
}
public static void main(String[] args) {
WordToHtmlAction action = new WordToHtmlAction();
try {
String content = action.docxToHtmlText(action.getMulFileByPath("F:\\test.docx")).getData().toString();
System.out.println(content);
} catch (Exception e) {
e.printStackTrace();
}
}
}