WORD03,WOR07 和PPT转HTML

项目中需要用OFFICE系列转成HTML展示出来,所以写了个工具类,转03,07与PPT的,由于EXCEL基本不需要,所以没有。由于POI是将整个个文件读入到内存中,所以无法转换过大的OFFICE文档。


需要引入的JAR包(MAVEN):

		<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.9</version>
        </dependency>  
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
            <version>1.0.4</version>
        </dependency> 

xdocreport所需要的包如果找不到可以在下面地址下载:

下载地址


代码:

package com.resource.util;

import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.RenderingHints;
import java.awt.image.BufferedImage;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.usermodel.RichTextRun;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.HtmlDocumentFacade;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.DefaultContentHandlerFactory;
import org.apache.poi.xwpf.converter.xhtml.IContentHandlerFactory;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.ContentHandler;

public class OfficeTool {
	private String path;		//文件路径
	private String imageSavePath;	//图片保存保存路径
	private String imageUrl;	//图片 src属性
	private String output;
	private HtmlDocumentFacade htmlDocumentFacade;
	private Element window;
	private Element topbar;
	private Element info;
	private Element outline;
	private Element page;
	private Element ul;

	public OfficeTool(File file) {
		this.path = file.getAbsolutePath();
		String filename = this.getFileName(path, false);
		this.imageSavePath = file.getParentFile().getAbsolutePath() + "/"
				+ filename + "_img/";
		this.imageUrl = this.imageSavePath.replaceAll("([D][:])*[/|\\\\]+", "/");
		System.out.println(imageUrl);
		this.output = file.getParentFile().getAbsolutePath() + "/" + filename + ".html";
		new File(this.imageSavePath).mkdirs();
	}

	public OfficeTool(String path, String imageSavePath, String imageUrl) {
		this.path = path;
		this.imageSavePath = imageSavePath;
		this.imageUrl = imageUrl;
		File imFile = new File(imageSavePath);
		imFile.mkdirs();
	}

	public OfficeTool(String path, String imageSavePath, String imageUrl,
			String output) {
		this.path = path;
		this.imageSavePath = imageSavePath;
		this.imageUrl = imageUrl;
		this.output = output;
		File imFile = new File(imageSavePath);
		imFile.mkdirs();
	}

	public String get03Html() {
		String html = "";
		try {
			html = convert2Html(path);
		} catch (TransformerException | IOException
				| ParserConfigurationException e) {
			e.printStackTrace();
		}
		return html;
	}

	public String get07Html() {
		String html = "";
		try {
			html = this.doGenerateHTMLFile();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return html;
	}
	
	public String getPPTHtml(){
		String html = "";
		try {
			this.convert(this.path, this.output);
	//		html = MyFileUtils.fileToString(output,"UTF-8");
			html = MyFileUtils.fileToString(output,"GB2312");
		} catch (IOException | TransformerException e) {
			e.printStackTrace();
		}
		return html;
	}

	public String save03AsHtml(String content) {
		this.writeFile(content, output);
		return output;
	}

	public String save07AsHtml(String content) {
		this.writeFile(content, output);
		return output;
	}

	private void writeFile(String content, String path) {
		try (FileOutputStream fos = new FileOutputStream(new File(path));
			BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"))){
	//		bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
			bw.write(content);
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} 
	}

	private String convert2Html(String fileName) throws TransformerException,
			IOException, ParserConfigurationException {
		POIFSFileSystem fss = new POIFSFileSystem(new FileInputStream(fileName));
		HWPFDocument wordDocument = new HWPFDocument(fss);
		WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
				DocumentBuilderFactory.newInstance().newDocumentBuilder()
						.newDocument());
		wordToHtmlConverter.setPicturesManager(new PicturesManager() {
			public String savePicture(byte[] content, PictureType pictureType,
					String suggestedName, float widthInches, float heightInches) {
				return imageUrl + suggestedName;
			}
		});
		wordToHtmlConverter.processDocument(wordDocument);
		// save pictures
		List pics = wordDocument.getPicturesTable().getAllPictures();
		if (pics != null) {
			for (int i = 0; i < pics.size(); i++) {
				Picture pic = (Picture) pics.get(i);
				try {
					pic.writeImageContent(new FileOutputStream(imageSavePath
							+ pic.suggestFullFileName()));
				} catch (FileNotFoundException e) {
					e.printStackTrace();
				}
			}
		}
		Document htmlDocument = wordToHtmlConverter.getDocument();
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		DOMSource domSource = new DOMSource(htmlDocument);
		StreamResult streamResult = new StreamResult(out);

		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer serializer = tf.newTransformer();
//		serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
		serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		serializer.setOutputProperty(OutputKeys.METHOD, "html");
		serializer.transform(domSource, streamResult);
		out.close();
		return out.toString();

	}

	/**
	 * word07转HTML
	 * 
	 * @return
	 * @throws IOException
	 */
	private String doGenerateHTMLFile() throws IOException {

		XWPFDocument document = new XWPFDocument(new FileInputStream(new File(
				path)));

		XHTMLOptions options = XHTMLOptions.create();// .indent( 4 );
		IContentHandlerFactory f = new DefaultContentHandlerFactory();
		// Extract image
		options.setExtractor(new FileImageExtractor(new File(imageSavePath)));
		// URI resolver
		options.URIResolver(new IURIResolver() {
			@Override
			public String resolve(String uri) {
				return imageUrl + uri;
			}
		});
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		ContentHandler contentHandler = f.create(out, null, options);
		XHTMLConverter.getInstance().convert(document, out, options);
		out.close();
		return out.toString();
	}
	
	
	/**
	 * PPT to HTML
	 * @param filePath	 PPT file path
	 * @param output	html file path
	 * @throws IOException
	 * @throws TransformerException
	 */
	public void convert(String filePath, String output) throws IOException,
			TransformerException {
		this.init();
		File pptFile = new File(filePath);
		if (!isPPt(pptFile)) {
			return;
		}
		try {
			process(pptFile, output);
		} catch (Exception e) {
			e.printStackTrace();
		}

		saveAsHtml(output, htmlDocumentFacade.getDocument());
	}

	/**
	 * convert ppt to '.png' file and generate '.html' code.
	 * 
	 * @param pptFile
	 * @param output
	 *            , html save path;
	 * @throws Exception
	 */
	private void process(File pptFile, String output) throws Exception {
		FileInputStream is = new FileInputStream(pptFile);
		SlideShow ppt = new SlideShow(is);
		is.close();
		Dimension pgsize = ppt.getPageSize();
		org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();
		this.info.appendChild(this.htmlDocumentFacade.createText(this
				.getFileName(pptFile.getPath(), true)));
		for (int i = 0; i < slide.length; i++) {

			addSlideTitle(slide[i]);

			TextRun[] truns = slide[i].getTextRuns();
			for (int k = 0; k < truns.length; k++) {

				RichTextRun[] richTexts = truns[k].getRichTextRuns();
				for (int l = 0; l < richTexts.length; l++) {

					String fontName = richTexts[l].getFontName();
					// rtruns[l].setFontIndex(1);
					// System.out.println(fontName);
					// POI bug???
					if (isTrueType(fontName))
						richTexts[l].setFontName("宋体");
					else
						richTexts[l].setFontName(fontName);
				}
			}
			BufferedImage img = new BufferedImage(pgsize.width, pgsize.height,
					BufferedImage.TYPE_INT_RGB);
			Graphics2D graphics = img.createGraphics();

			graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING,
					RenderingHints.VALUE_ANTIALIAS_ON);
			graphics.setRenderingHint(RenderingHints.KEY_RENDERING,
					RenderingHints.VALUE_RENDER_QUALITY);
			graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
					RenderingHints.VALUE_INTERPOLATION_BICUBIC);
			graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS,
					RenderingHints.VALUE_FRACTIONALMETRICS_ON);

			graphics.setColor(Color.white);
			graphics.clearRect(0, 0, pgsize.width, pgsize.height);

			slide[i].draw(graphics);
			// String imgName = StringUtil.getFileName(pptFile.getPath(),false)
			// + "_" + (i + 1) + ".png";
			String imgName = getFileName(pptFile.getPath(), false) + "_"
					+ (i + 1) + ".png";
			// String imgPath = StringUtil.getFilePath(output) + "/images/";
			String imgPath = this.imageSavePath;
			if (i == 0) {
				new File(imgPath).mkdir();
			}
			FileOutputStream out = new FileOutputStream(imgPath + imgName);
			javax.imageio.ImageIO.write(img, "png", out);
			out.close();

			addSlide(imageUrl + imgName, slide[i].getSlideNumber());

		}
	}

	/**
	 * generate outline;
	 * 
	 * @param slide
	 */
	private void addSlideTitle(org.apache.poi.hslf.model.Slide slide) {

		String title = StringUtils.isBlank(slide.getTitle()) ? slide
				.getSlideNumber() + ": Untitled" : slide.getSlideNumber()
				+ ": " + slide.getTitle();

		Element list = htmlDocumentFacade.createListItem();

		Element a = htmlDocumentFacade.createHyperlink("#link"
				+ slide.getSlideNumber());
		// a.setTextContent(title);
		a.appendChild(htmlDocumentFacade.createText(title));
		// a.setNodeValue(title);
		list.appendChild(a);
		this.ul.appendChild(list);
	}

	/**
	 * generate slide, block style
	 * 
	 * @param imagePath
	 * @param index
	 */
	private void addSlide(String imagePath, int index) {
		Element slideBlock = htmlDocumentFacade.createBlock();
		slideBlock.setAttribute("class", "slide");
		// anchor pointer
		Element link = htmlDocumentFacade.createBookmark("link" + index);

		Element img = htmlDocumentFacade.createImage(imagePath);
		// img.setAttribute("style", "position:relative;top:15%;");

		slideBlock.appendChild(link);
		slideBlock.appendChild(img);

		this.page.appendChild(slideBlock);

	}

	private void init() {
		try {

			// build document
			DocumentBuilder builder = DocumentBuilderFactory.newInstance()
					.newDocumentBuilder();
			// DOMImplementation domImpl = builder.getDOMImplementation();
			// DocumentType doctype = domImpl.createDocumentType("",
			// "-//W3C//DTD HTML 4.01 Transitional//EN",
			// "http://www.w3.org/TR/html4/strict.dtd");
			// Document document = domImpl.createDocument(null, null, doctype);
			Document document = builder.newDocument();

			htmlDocumentFacade = new HtmlDocumentFacade(document);

			// glob layout
			window = htmlDocumentFacade.createBlock();
			window.setAttribute("id", "window");
			info = htmlDocumentFacade.createBlock();
			info.setAttribute("id", "info");
			outline = htmlDocumentFacade.createBlock();
			outline.setAttribute("id", "outline");
			page = htmlDocumentFacade.createBlock();
			page.setAttribute("id", "page");

			// outline layout
			ul = htmlDocumentFacade.createUnorderedList();
			outline.appendChild(ul);

			window.appendChild(info);
			window.appendChild(outline);
			window.appendChild(page);

			htmlDocumentFacade.getBody().appendChild(window);

			/*
			 * DocumentType docType =
			 * htmlDocumentFacade.getDocument().getDoctype();
			 * 
			 * System.out.println("docType :    -------  " + docType);
			 */

			setCommonStyle(htmlDocumentFacade.getDocument());

		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		}
	}

	private void saveAsHtml(String output, org.w3c.dom.Document document)
			throws IOException, TransformerException {
		FileWriter out = new FileWriter(output);
		DOMSource domSource = new DOMSource(document);
		StreamResult streamResult = new StreamResult(out);

		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer serializer = tf.newTransformer();
		// TODO set encoding from a command argument
	//	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
		serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		serializer.setOutputProperty(OutputKeys.METHOD, "html");
		serializer.setOutputProperty(OutputKeys.STANDALONE, "yes");
		serializer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,
				"-//W3C//DTD HTML 4.01 Transitional//EN"); //
		serializer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
				"http://www.w3.org/TR/html4/strict.dtd");

		serializer.transform(domSource, streamResult);
		out.close();
	}

	private static boolean isPPt(File file) {
		if (!file.canRead())
			return false;
		int sep = file.getPath().lastIndexOf(".");
		if ("ppt".equals(file.getPath().substring(sep + 1,
				file.getPath().length())))
			return true;

		return false;
	}

	public static boolean isTrueType(String fontName) {
		String[] trueType = new String[] { "Tahoma", "Times New Roman",
				"Calibri", "Arial" };
		for (String type : trueType) {
			if (type.equals(fontName))
				return true;
		}
		return false;
	}

	private void setCommonStyle(Document document) {
		Element styleSheet = (Element) document.getElementsByTagName("style")
				.item(0);
		if (styleSheet == null)
			return;

		String sep = "\n";

		StringBuffer sb = new StringBuffer();
		sb.append(sep);
		sb.append("html{height: 100%;overflow-y:hidden;}");
		sb.append(sep);
		sb.append("body{height:100%;overflow-y: hidden;margin:0; background:#bdc2cd}");
		sb.append(sep);
		sb.append("#window{min-width: 800px;height:100%;}");
		sb.append(sep);
		// sb.append("#topbar{min-height:24px;}");
		// sb.append(sep);
		sb.append("#info{font-size: 24px;font-weight: 800;border-bottom:2px solid gray;height:5%; background:#eee;padding-left:5px;}");
		sb.append(sep);
		sb.append("#outline{float:left;height:95%;width:20%;overflow-y: auto; overflow-x:hidden;background:#fff; "
				+ "margin-right:3%; -moz-box-shadow: 4px 4px 12px #2b2b2b;-webkit-box-shadow: 4px 4px 12px #2b2b2b;"
				+ "box-shadow: 4px 4px 12px #2b2b2b;}");
		sb.append(sep);
		sb.append("#outline ul li {list-style:none;line-height: 25px;}");
		sb.append(sep);
		sb.append("#outline ul li a{text-decoration:none;white-space:nowrap;text-overflow:ellipsis;}");
		sb.append(sep);
		sb.append("#page{float:left;overflow-y: auto;overflow-x:hidden;height:95%;width:77%;margin-right:-20%;background:#bdc2cd; text-align:center;}");
		sb.append(sep);
		sb.append("#page div{width:100%;height:100%;}");
		sb.append(sep);
		// styleSheet.setNodeValue(sb.toString());
		// styleSheet.setTextContent(sb.toString());
		styleSheet.appendChild(document.createTextNode(sb.toString()));

	}

	private String getFileName(String path, boolean issuffix) {
		String name = "";
		int index = path.lastIndexOf("/") == -1 ? path.lastIndexOf("\\") : path
				.lastIndexOf("/");
		if (StringUtils.isNotBlank(path)) {
			if (issuffix) {
				name = path.substring(index + 1);
			} else {
				name = path.substring(index + 1, path.lastIndexOf("."));
			}
		}
		return name;
	}

	private String getFilePath(String path) {
		String p = "";
		int index = path.lastIndexOf("/") == -1 ? path.lastIndexOf("\\") : path
				.lastIndexOf("/");
		if (StringUtils.isNotBlank(path)) {
			p = path.substring(0, index);
		}
		return p;
	}

}

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值