引
需要给官网做一个文档管理的后台服务,需要一个上传Word文档转换成HTML格式返回给前端的功能,下面和大家一起来看看,后端部分的逻辑实现。
实现逻辑
需要用到的引用:
compile('org.apache.poi:poi:4.1.0')
compile('org.apache.poi:poi-ooxml:4.1.0')
compile('org.apache.poi:poi-ooxml-schemas:4.1.0')
compile('org.apache.poi:poi-scratchpad:4.1.0')
compile('fr.opensagres.xdocreport:xdocreport:2.0.2')
compile('org.apache.poi:ooxml-schemas:1.4')
doc转html实现
/**
* 上传Word文档,返回解析后的Html
*/
public String uploadDocFile(MultipartFile file) throws Exception{
String filePath = this.getClass().getClassLoader().getResource("file").getPath();
String targetFileName = filePath +"/"+ "temp.html";
File target = new File(targetFileName);
target.getParentFile().mkdirs();
//将上传的文件传入Document转换
HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
// word文档转Html文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
//将读取到的图片上传并添加链接地址
wordToHtmlConverter.setPicturesManager((imageStream, pictureType, name, width, height) -> {
try {
String imageUrl = uploadImages(imageStream);
} catch (Exception e) {
e.printStackTrace();
}
return imageUrl;
});
//生成html文件
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
// 读取并过滤文件格式
String htmlContent = splitContext(targetFileName);
// 删除生成的html文件
File files = new File(targetFileName);
files.delete();
return htmlContent;
}
/**
* 过滤html文件内容
* @param filePath
* @return
*/
public static String splitContext(String filePath) {
File file = new File(filePath);
BufferedReader reader = null;
try {
String tempString;
InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");
reader = new BufferedReader(isr);
StringBuilder sb = new StringBuilder();
while ((tempString = reader.readLine()) != null) {
sb.append(tempString);
}
reader.close();
// 将文件中的双引号替换为单引号
String content = sb.toString().replaceAll("\"","\'");
return content;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return "";
}
docx转html实现
/**
* 上传docx文档,返回解析后的Html
*/
public Stirng uploadDocXFile(MultipartFile file) throws Exception{
// 将上传的文件传入Document转换
XWPFDocument docxDocument = new XWPFDocument(file.getInputStream());
XHTMLOptions options = XHTMLOptions.create();
// 设置图片存储路径
String path = System.getProperty("java.io.tmpdir");
String firstImagePathStr = path + "/" + System.currentTimeMillis();
options.setExtractor(new FileImageExtractor(new File(firstImagePathStr)));
options.URIResolver(new BasicURIResolver(firstImagePathStr));
// 转换html
ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options);
String htmlStr = htmlStream.toString();
// 将image文件转换为base64并替换到html字符串里
String middleImageDirStr = "/word/media";
String imageDirStr = firstImagePathStr + middleImageDirStr;
File imageDir = new File(imageDirStr);
String[] imageList = imageDir.list();
String downloadPath;
if (imageList != null) {
for (int i = 0; i < imageList.length; i++) {
String oneImagePathStr = imageDirStr + "/" + imageList[i];
MultipartFile multipartFile = getMulFileByPath(oneImagePathStr);
String imageUrl = uploadImages(multipartFile);
// 也可以直接转成Base64格式处理,如下:
// String imageBase64Str = new String(Base64.encodeBase64(FileUtils.readFileToByteArray(oneImageFile)), "UTF-8");
//修改文档中的图片信息
htmlStr = htmlStr.replace(oneImagePathStr, imageUrl);
}
}
//删除图片路径
File firstImagePath = new File(firstImagePathStr);
FileUtils.deleteDirectory(firstImagePath);
return htmlStr;
}
/**
* 获取MultipartFile文件
* @param picPath
* @return
*/
private static MultipartFile getMulFileByPath(String picPath) {
FileItem fileItem = createFileItem(picPath);
MultipartFile mfile = new CommonsMultipartFile(fileItem);
return mfile;
}
private static FileItem createFileItem(String filePath)
{
FileItemFactory factory = new DiskFileItemFactory(16, null);
String textFieldName = "textField";
int num = filePath.lastIndexOf(".");
String extFile = filePath.substring(num);
FileItem item = factory.createItem(textFieldName, "text/plain", true,
"MyFileName" + extFile);
File newfile = new File(filePath);
int bytesRead = 0;
byte[] buffer = new byte[8192];
try
{
FileInputStream fis = new FileInputStream(newfile);
OutputStream os = item.getOutputStream();
while ((bytesRead = fis.read(buffer, 0, 8192))
!= -1)
{
os.write(buffer, 0, bytesRead);
}
os.close();
fis.close();
}
catch (IOException e)
{
e.printStackTrace();
}
return item;
}
小结
实现主要依赖了POI包,将文件流传入document转换html,读取html文件中的图片进行上传处理,并将上传后的链接补充到文档中。