java word 转换成 html,java word 转换成html

最新推荐文章于 2023-11-13 17:16:15 发布

TAGEER

最新推荐文章于 2023-11-13 17:16:15 发布

阅读量135

点赞数

文章标签： java word 转换成 html

package com.cong.quartz.util;

import org.apache.commons.io.FileUtils;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xwpf.converter.core.BasicURIResolver;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import java.io.*;

import java.util.List;

/**

* word 转换成html

public class WordToHtml {

/**

* docx转换成html

public static void Word2007ToHtml(String datestr,String id,String docpath,String htmlpath, String docname, String htmlname) throws IOException {

///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/doc/202010/0e5b5829881647248a58c4fd94cb41e3.docx

final String file = docpath + "/" + datestr + "/" + docname;

File f = new File(file);

if (!f.exists()) {

System.out.println("Sorry File does not Exists!");

} else {

if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {

// 1) 加载word文档生成 XWPFDocument对象

InputStream in = new FileInputStream(f);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)

///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/80c0ce15d6e940408b157f00f10d523d/image5.png

File htmlFolderFile = new File(htmlpath + "/" + datestr);

if(!htmlFolderFile.exists()){

htmlFolderFile.mkdirs();

}

File imageFolderFile = new File(htmlpath + "/" + datestr+ "/" + id);

if(!imageFolderFile.exists()){

imageFolderFile.mkdirs();

}

//XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));

XHTMLOptions options = XHTMLOptions.create();;

options.setExtractor(new FileImageExtractor(imageFolderFile));

//图片位置---这里需要改变

options.URIResolver(new BasicURIResolver(id));

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

// 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder)

// 3) 将 XWPFDocument转换成XHTML

///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/00b4fe3d59ac486187f2f5173e359075.html

String targetFileName = htmlpath + "/" + datestr + "/" + htmlname;

OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");

XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();

xhtmlConverter.convert(document, outputStreamWriter, options);

//OutputStream out = new FileOutputStream(new File(htmlpath + "/" + datestr + "/" + htmlname));

//XHTMLConverter.getInstance().convert(document, out, options);

} else {

System.out.println("Enter only MS Office 2007+ files");

}

/**

* doc转换成html

public static void convert2Html(String datestr,String id,String docpath,String htmlpath,String docname, String htmlname)

throws TransformerException, IOException,

ParserConfigurationException {

///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/doc/202010/0e5b5829881647248a58c4fd94cb41e3.docx

File htmlFolderFile = new File(htmlpath + "/" + datestr);

if(!htmlFolderFile.exists()){

htmlFolderFile.mkdirs();

}

File imageFolderFile = new File(htmlpath + "/" + datestr+ "/" + id);

if(!imageFolderFile.exists()){

imageFolderFile.mkdirs();

}

HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docpath+"/"+datestr+"/"+docname));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

wordToHtmlConverter.setPicturesManager( new PicturesManager()

{

public String savePicture(byte[] content,

PictureType pictureType, String suggestedName,

float widthInches, float heightInches )

{

return htmlpath + "/" + datestr + "/" + id + "/" +suggestedName;

}

} );

wordToHtmlConverter.processDocument(wordDocument);

//save pictures

List pics=wordDocument.getPicturesTable().getAllPictures();

if(pics!=null){

for(int i=0;i

Picture pic = (Picture)pics.get(i);

System.out.println();

try {

///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/80c0ce15d6e940408b157f00f10d523d/image5.png

pic.writeImageContent(new FileOutputStream(htmlpath + "/" + datestr + "/" + id + "/" + pic.suggestFullFileName()));

} catch (FileNotFoundException e) {

e.printStackTrace();

}

Document htmlDocument = wordToHtmlConverter.getDocument();

ByteArrayOutputStream out = new ByteArrayOutputStream();

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(out);

TransformerFactory tf = TransformerFactory.newInstance();

Transformer serializer = tf.newTransformer();

//serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");

serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

out.close();

String content = new String(out.toByteArray());

String imgWebPath = htmlpath + "/" + datestr + "/" + id;

content = content.replace(imgWebPath, id);

String targetFilePath = htmlpath + "/" + datestr + "/" +htmlname;

FileUtils.writeStringToFile(new File(targetFilePath), content, "utf-8");

//writeFile(new String(out.toByteArray()), htmlpath + "/" + datestr + "/" +htmlname);

}

public static void writeFile(String content, String path) {

FileOutputStream fos = null;

BufferedWriter bw = null;

try {

File file = new File(path);

fos = new FileOutputStream(file);

// bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312"));

bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));

bw.write(content);

} catch (FileNotFoundException fnfe) {

fnfe.printStackTrace();

} catch (IOException ioe) {

ioe.printStackTrace();

} finally {

try {

if (bw != null)

bw.close();

if (fos != null)

fos.close();

} catch (IOException ie) {

}

TAGEER

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
java word 转换成 html,java word 转换成html

package com.cong.quartz.util;import org.apache.commons.io.FileUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.Wo...
复制链接

扫一扫