java word 转 PDF|HTML|PNG

wang130104

已于 2022-03-27 01:07:31 修改

阅读量426

点赞数

文章标签： java

于 2022-03-27 00:53:28 首次发布

本文链接：https://blog.csdn.net/wangzhijie302/article/details/123767101

版权

@[toc] java word 转 PDF|HTML|PNG
word文档转换图片或pdf格式，需要使用（aspose-words-15.8.0-jdk16.jar）

链接：https://pan.baidu.com/s/1z08E3IkJ8BbTrkJspF_osA
提取码：8qpu

import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import com.aspose.words.Document;
import com.aspose.words.ImageSaveOptions;
import com.aspose.words.License;
import com.aspose.words.SaveFormat;

word 转 PDF

    /**
     * word转pdf
     * @param docPath
     * @param savePath
     */
    public static void word2pdf(String docPath,String savePath){
        try {
            String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
            ByteArrayInputStream is = new ByteArrayInputStream(s.getBytes());
            License license = new License();
            license.setLicense(is);
            Document document = new Document(docPath);
            document.save(new FileOutputStream(new File(savePath)),SaveFormat.PDF);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

word转HTML


    /**
     * word转HTML
     * @param docPath
     * @param savePath
     */
    public static void word2HTML(String docPath,String savePath){
        try {
            String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
            ByteArrayInputStream is = new ByteArrayInputStream(s.getBytes());
            License license = new License();
            license.setLicense(is);
            Document document = new Document(docPath);
            document.save(new FileOutputStream(new File(savePath)),SaveFormat.HTML);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

word和txt文件转换图片

    /**
     * word和txt文件转换图片
     * @param inputStream
     * @param pageNum
     * @return
     * @throws Exception
     */
    private static List<BufferedImage> wordToImg(InputStream inputStream, int pageNum) throws Exception {
        if (!isWordLicense()) {
            return null;
        }
        try {
            Date start = new Date();
            Document doc = new Document(inputStream);
            ImageSaveOptions options = new ImageSaveOptions(SaveFormat.PNG);
            options.setPrettyFormat(true);
            options.setUseAntiAliasing(true);
            options.setUseHighQualityRendering(true);
            int pageCount = doc.getPageCount();
            //生成前pageCount张，这可以限制输出长图时的页数
           /*if (pageCount > pageNum) {
               pageCount = pageNum;
           }*/
            List<BufferedImage> imageList = new ArrayList<BufferedImage>();
            for (int i = 0; i < pageCount; i++) {
                OutputStream output = new ByteArrayOutputStream();
                options.setPageIndex(i);
                //setPageSet(com.aspose.words.PageSet value)
                //options.setPageSet(i);
                doc.save(output, options);
                ImageInputStream imageInputStream = javax.imageio.ImageIO.createImageInputStream(parse(output));
                imageList.add(javax.imageio.ImageIO.read(imageInputStream));
            }
            List<BufferedImage> imageList2 = new ArrayList<BufferedImage>();
            //这个重新生成新的图片是因为直接输出的图片底色为红色
            for(int j=0; j<imageList.size(); j++){
                // 生成新图片
                BufferedImage destImage = imageList.get(j);
                int w1 = destImage.getWidth();
                int h1 = destImage.getHeight();
                destImage = new BufferedImage(w1, h1, BufferedImage.TYPE_INT_RGB);
                Graphics2D g2 = (Graphics2D) destImage.getGraphics();
                g2.setBackground(Color.LIGHT_GRAY);
                g2.clearRect(0, 0, w1, h1);
                g2.setPaint(Color.RED);
                // 从图片中读取RGB
                int[] ImageArrayOne = new int[w1 * h1];
                ImageArrayOne = imageList.get(j).getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中
                destImage.setRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
                imageList2.add(destImage);
            }
            Date end = new Date();
            long l=end.getTime()-start.getTime();
            long hour= l / (1000 * 60 * 60);
            long min=  (l-hour*(1000 * 60 * 60 ))/(1000* 60);
            long s= (l-hour*(1000 * 60 * 60 )-min*1000*60)/(1000);
            long ss= (l-hour*(1000 * 60 * 60 )-min*1000*60 -s*1000)/(1000/60);
            System.out.println("word转图片时间:"+min+"分"+s+"秒" + ss + "毫秒");//hour+"小时"+
            return imageList2;
        } catch (Exception e) {
            e.printStackTrace();
            throw e;
        }
    }
 //outputStream转inputStream
    public static ByteArrayInputStream parse(OutputStream out) throws Exception {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        baos = (ByteArrayOutputStream) out;
        ByteArrayInputStream swapStream = new ByteArrayInputStream(baos.toByteArray());
        return swapStream;
    }

去水印

    /**
     * 验证aspose.word组件是否授权：无授权的文件有水印标记
     * 需要使用（aspose-words-15.8.0-jdk16.jar），版本要对应。无水印
     * @return
     */
    public static boolean isWordLicense() {
        boolean result = false;
        try {
            String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
            ByteArrayInputStream inputStream = new ByteArrayInputStream(s.getBytes());
            com.aspose.words.License license = new com.aspose.words.License();
            license.setLicense(inputStream);
            result = true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }

合并任数量的图片成一张图片

/**
     * 合并任数量的图片成一张图片
     * @param isHorizontal true代表水平合并，fasle代表垂直合并
     * @param imgs 待合并的图片数组
     * @return
     * @throws IOException
     */
    public static BufferedImage mergeImage(boolean isHorizontal, List<BufferedImage> imgs) throws IOException {
        // 生成新图片
        BufferedImage destImage = null;
        // 计算新图片的长和高
        int allw = 0, allh = 0, allwMax = 0, allhMax = 0;
        // 获取总长、总宽、最长、最宽
        for (int i = 0; i < imgs.size(); i++) {
            BufferedImage img = imgs.get(i);
            allw += img.getWidth();

            if (imgs.size() != i + 1) {
                allh += img.getHeight() + 5;
            } else {
                allh += img.getHeight();
            }
            if (img.getWidth() > allwMax) {
                allwMax = img.getWidth();
            }
            if (img.getHeight() > allhMax) {
                allhMax = img.getHeight();
            }
        }
        // 创建新图片
        if (isHorizontal) {
            destImage = new BufferedImage(allw, allhMax, BufferedImage.TYPE_INT_RGB);
        } else {
            destImage = new BufferedImage(allwMax, allh, BufferedImage.TYPE_INT_RGB);
        }
        Graphics2D g2 = (Graphics2D) destImage.getGraphics();
        g2.setBackground(Color.LIGHT_GRAY);
        g2.clearRect(0, 0, allw, allh);
        g2.setPaint(Color.RED);

        // 合并所有子图片到新图片
        int wx = 0, wy = 0;
        for (int i = 0; i < imgs.size(); i++) {
            BufferedImage img = imgs.get(i);
            int w1 = img.getWidth();
            int h1 = img.getHeight();
            // 从图片中读取RGB
            int[] ImageArrayOne = new int[w1 * h1];
            ImageArrayOne = img.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中
            if (isHorizontal) { // 水平方向合并
                destImage.setRGB(wx, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
            } else { // 垂直方向合并
                destImage.setRGB(0, wy, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
            }
            wx += w1;
            wy += h1 + 5;
        }

        return destImage;
    }

测试

public static void main(String[] args){
		//word转pdf
		word2pdf("C:\\11.docx","C:\\11.pdf");
		//word转html
       	word2HTML("C:\\11.docx","C:\\11.html");
        //word转图片格式
        try {
            File file = new File("C:\\11.docx");
            InputStream inStream = new FileInputStream(file);
            List<BufferedImage> wordToImg = wordToImg(inStream,2);//
            for(int i=0; i<wordToImg.size(); i++){
                //保存图片（单张）
                ImageIO.write(wordToImg.get(i), "jpg", new File("C:\\"+ i +".png")); //将其保存在C:/imageSort/targetPIC/下
            }
            BufferedImage mergeImage = mergeImage(false, wordToImg);
            //保存图片（长图）
            ImageIO.write(mergeImage, "jpg", new File("C:\\Users\\86152\\Desktop\\word\\xx.png"));
        } catch (Exception e) {
            e.printStackTrace();
        }
        word2HTML("C:\\11.docx","C:\\11.html");
   }

第二种

import com.aspose.words.Document;
import com.aspose.words.SaveFormat;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDStream;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
//https://blog.csdn.net/csdnFlyFun/article/details/79523262?locationNum=1&fps=1
public class Word2PDF {

    public static void main(String[] args) throws IOException {
        //doc2pdf("C:\\word\\11.docx");
    }


    //替换pdf文本内容
    public static void replaceText(PDPage page, String searchString, String replacement) throws IOException {
        PDFStreamParser parser = new PDFStreamParser(page);
        List<?> tokens = parser.parse();
        for (int j = 0; j < tokens.size(); j++) {
            Object next = tokens.get(j);
            if (next instanceof Operator) {
                Operator op = (Operator) next;
                String pstring = "";
                int prej = 0;
                if (op.getName().equals("Tj")) {
                    COSString previous = (COSString) tokens.get(j - 1);
                    String string = previous.getString();
                    string = string.replaceFirst(searchString, replacement);
                    previous.setValue(string.getBytes());
                } else if (op.getName().equals("TJ")) {
                    COSArray previous = (COSArray) tokens.get(j - 1);
                    for (int k = 0; k < previous.size(); k++) {
                        Object arrElement = previous.getObject(k);
                        if (arrElement instanceof COSString) {
                            COSString cosString = (COSString) arrElement;
                            String string = cosString.getString();

                            if (j == prej) {
                                pstring += string;
                            } else {
                                prej = j;
                                pstring = string;
                            }
                        }
                    }
                    if (searchString.equals(pstring.trim())) {
                        COSString cosString2 = (COSString) previous.getObject(0);
                        cosString2.setValue(replacement.getBytes());
                        int total = previous.size() - 1;
                        for (int k = total; k > 0; k--) {
                            previous.remove(k);
                        }
                    }
                }
            }
        }
        List<PDStream> contents = new ArrayList<>();
        Iterator<PDStream> streams = page.getContentStreams();
        while (streams.hasNext()) {
            PDStream updatedStream = streams.next();
            OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE);
            ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
            tokenWriter.writeTokens(tokens);
            contents.add(updatedStream);
            out.close();
        }
        page.setContents(contents);
    }

    //移除图片水印
    public static void removeImage(PDPage page, String cosName) {
        PDResources resources = page.getResources();
        COSDictionary dict1 = resources.getCOSObject();
        resources.getXObjectNames().forEach(e -> {
            if (resources.isImageXObject(e)) {
                COSDictionary dict2 = dict1.getCOSDictionary(COSName.XOBJECT);
                if (e.getName().equals(cosName)) {
                    dict2.removeItem(e);
                }
            }
            page.setResources(new PDResources(dict1));
        });
    }


    //移除文字水印
    public static boolean removeWatermark(File file) {
        try {
            //通过文件名加载文档
            PDDocument document = Loader.loadPDF(file);
            PDPageTree pages = document.getPages();
            Iterator<PDPage> iter = pages.iterator();
            while (iter.hasNext()) {
                PDPage page = iter.next();

                //去除文字水印
                replaceText(page, "Evaluation Only. Created with Aspose.Words. Copyright 2003-2021 Aspose Pty Ltd.", "");
                replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full versions of our APIs please", "");
                replaceText(page, "visit: https://products.aspose.com/words/", "");
                replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full", "");
                replaceText(page, "versions of our APIs please visit: https://products.aspose.com/words/", "");
                replaceText(page, "This document was truncated here because it was created in the Evaluation", "");
                //去除文字水印

                replaceText(page, "Evaluation Only. Created with Aspose.Words. Copyright 2003-2021 Aspose", "");
                replaceText(page, "Evaluation Only. Created with Aspose.Words. Copyright 2003-2021 Aspose Pty Ltd.", "");
                replaceText(page, "Pty Ltd.", "");
                replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full", "");
                replaceText(page, "versions of our APIs please visit: https://products.aspose.com/words/", "");
                replaceText(page, "This document was truncated here because it was created in the Evaluation", "");
                replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full versions of our APIs please visit: https://products.aspose.com/words/", "");
                replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full versions of", "");
                replaceText(page, "our APIs please visit: https://products.aspose.com/words/", "");
                //去除图片水印
                removeImage(page, "X1");
            }
            document.removePage(document.getNumberOfPages() - 1);
            file.delete();
            document.save(file);
            document.close();
            return true;
        } catch (IOException ex) {
            ex.printStackTrace();
            return false;
        }

    }


    //doc文件转pdf(目前最大支持21页)
    //public static void doc2pdf(String wordPath) {
    public static void doc2pdf(String wordPath,String pdfPath) {
        long old = System.currentTimeMillis();
        try {
            //新建一个pdf文档
            //String pdfPath=wordPath.substring(0,wordPath.lastIndexOf("."))+".pdf";
            File file = new File(pdfPath);
            FileOutputStream os = new FileOutputStream(file);
            //Address是将要被转化的word文档
            Document doc = new Document(wordPath);
            //全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换
            doc.save(os, SaveFormat.PDF);
            os.close();
            //去除水印
            removeWatermark(new File(pdfPath));
            //转化用时
            long now = System.currentTimeMillis();
            System.out.println("Word 转 Pdf 共耗时：" + ((now - old) / 1000.0) + "秒");
        } catch (Exception e) {
            System.out.println("Word 转 Pdf 失败...");
            e.printStackTrace();
        }
    }


}

wang130104

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
java word 转 PDF|HTML|PNG

@TOC(java word 转 PDF|HTML|PNG)word文档转换图片或pdf格式，需要使用（aspose-words-15.8.0-jdk16.jar）链接：https://pan.baidu.com/s/1z08E3IkJ8BbTrkJspF_osA提取码：8qpuword 转 PDF /** * word转pdf * @param docPath * @param savePath */ public static void
复制链接

扫一扫