@[toc] java word 转 PDF|HTML|PNG
word文档转换图片或pdf格式,需要使用(aspose-words-15.8.0-jdk16.jar)
- 链接:https://pan.baidu.com/s/1z08E3IkJ8BbTrkJspF_osA
- 提取码:8qpu
import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import com.aspose.words.Document;
import com.aspose.words.ImageSaveOptions;
import com.aspose.words.License;
import com.aspose.words.SaveFormat;
word 转 PDF
/**
* word转pdf
* @param docPath
* @param savePath
*/
public static void word2pdf(String docPath,String savePath){
try {
String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
ByteArrayInputStream is = new ByteArrayInputStream(s.getBytes());
License license = new License();
license.setLicense(is);
Document document = new Document(docPath);
document.save(new FileOutputStream(new File(savePath)),SaveFormat.PDF);
} catch (Exception e) {
e.printStackTrace();
}
}
word转HTML
/**
* word转HTML
* @param docPath
* @param savePath
*/
public static void word2HTML(String docPath,String savePath){
try {
String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
ByteArrayInputStream is = new ByteArrayInputStream(s.getBytes());
License license = new License();
license.setLicense(is);
Document document = new Document(docPath);
document.save(new FileOutputStream(new File(savePath)),SaveFormat.HTML);
} catch (Exception e) {
e.printStackTrace();
}
}
word和txt文件转换图片
/**
* word和txt文件转换图片
* @param inputStream
* @param pageNum
* @return
* @throws Exception
*/
private static List<BufferedImage> wordToImg(InputStream inputStream, int pageNum) throws Exception {
if (!isWordLicense()) {
return null;
}
try {
Date start = new Date();
Document doc = new Document(inputStream);
ImageSaveOptions options = new ImageSaveOptions(SaveFormat.PNG);
options.setPrettyFormat(true);
options.setUseAntiAliasing(true);
options.setUseHighQualityRendering(true);
int pageCount = doc.getPageCount();
//生成前pageCount张,这可以限制输出长图时的页数
/*if (pageCount > pageNum) {
pageCount = pageNum;
}*/
List<BufferedImage> imageList = new ArrayList<BufferedImage>();
for (int i = 0; i < pageCount; i++) {
OutputStream output = new ByteArrayOutputStream();
options.setPageIndex(i);
//setPageSet(com.aspose.words.PageSet value)
//options.setPageSet(i);
doc.save(output, options);
ImageInputStream imageInputStream = javax.imageio.ImageIO.createImageInputStream(parse(output));
imageList.add(javax.imageio.ImageIO.read(imageInputStream));
}
List<BufferedImage> imageList2 = new ArrayList<BufferedImage>();
//这个重新生成新的图片是因为直接输出的图片底色为红色
for(int j=0; j<imageList.size(); j++){
// 生成新图片
BufferedImage destImage = imageList.get(j);
int w1 = destImage.getWidth();
int h1 = destImage.getHeight();
destImage = new BufferedImage(w1, h1, BufferedImage.TYPE_INT_RGB);
Graphics2D g2 = (Graphics2D) destImage.getGraphics();
g2.setBackground(Color.LIGHT_GRAY);
g2.clearRect(0, 0, w1, h1);
g2.setPaint(Color.RED);
// 从图片中读取RGB
int[] ImageArrayOne = new int[w1 * h1];
ImageArrayOne = imageList.get(j).getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中
destImage.setRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
imageList2.add(destImage);
}
Date end = new Date();
long l=end.getTime()-start.getTime();
long hour= l / (1000 * 60 * 60);
long min= (l-hour*(1000 * 60 * 60 ))/(1000* 60);
long s= (l-hour*(1000 * 60 * 60 )-min*1000*60)/(1000);
long ss= (l-hour*(1000 * 60 * 60 )-min*1000*60 -s*1000)/(1000/60);
System.out.println("word转图片时间:"+min+"分"+s+"秒" + ss + "毫秒");//hour+"小时"+
return imageList2;
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
//outputStream转inputStream
public static ByteArrayInputStream parse(OutputStream out) throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos = (ByteArrayOutputStream) out;
ByteArrayInputStream swapStream = new ByteArrayInputStream(baos.toByteArray());
return swapStream;
}
去水印
/**
* 验证aspose.word组件是否授权:无授权的文件有水印标记
* 需要使用(aspose-words-15.8.0-jdk16.jar),版本要对应。无水印
* @return
*/
public static boolean isWordLicense() {
boolean result = false;
try {
String s = "<License><Data><Products><Product>Aspose.Total for Java</Product><Product>Aspose.Words for Java</Product></Products><EditionType>Enterprise</EditionType><SubscriptionExpiry>20991231</SubscriptionExpiry><LicenseExpiry>20991231</LicenseExpiry><SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber></Data><Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature></License>";
ByteArrayInputStream inputStream = new ByteArrayInputStream(s.getBytes());
com.aspose.words.License license = new com.aspose.words.License();
license.setLicense(inputStream);
result = true;
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
合并任数量的图片成一张图片
/**
* 合并任数量的图片成一张图片
* @param isHorizontal true代表水平合并,fasle代表垂直合并
* @param imgs 待合并的图片数组
* @return
* @throws IOException
*/
public static BufferedImage mergeImage(boolean isHorizontal, List<BufferedImage> imgs) throws IOException {
// 生成新图片
BufferedImage destImage = null;
// 计算新图片的长和高
int allw = 0, allh = 0, allwMax = 0, allhMax = 0;
// 获取总长、总宽、最长、最宽
for (int i = 0; i < imgs.size(); i++) {
BufferedImage img = imgs.get(i);
allw += img.getWidth();
if (imgs.size() != i + 1) {
allh += img.getHeight() + 5;
} else {
allh += img.getHeight();
}
if (img.getWidth() > allwMax) {
allwMax = img.getWidth();
}
if (img.getHeight() > allhMax) {
allhMax = img.getHeight();
}
}
// 创建新图片
if (isHorizontal) {
destImage = new BufferedImage(allw, allhMax, BufferedImage.TYPE_INT_RGB);
} else {
destImage = new BufferedImage(allwMax, allh, BufferedImage.TYPE_INT_RGB);
}
Graphics2D g2 = (Graphics2D) destImage.getGraphics();
g2.setBackground(Color.LIGHT_GRAY);
g2.clearRect(0, 0, allw, allh);
g2.setPaint(Color.RED);
// 合并所有子图片到新图片
int wx = 0, wy = 0;
for (int i = 0; i < imgs.size(); i++) {
BufferedImage img = imgs.get(i);
int w1 = img.getWidth();
int h1 = img.getHeight();
// 从图片中读取RGB
int[] ImageArrayOne = new int[w1 * h1];
ImageArrayOne = img.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中
if (isHorizontal) { // 水平方向合并
destImage.setRGB(wx, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
} else { // 垂直方向合并
destImage.setRGB(0, wy, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
}
wx += w1;
wy += h1 + 5;
}
return destImage;
}
测试
public static void main(String[] args){
//word转pdf
word2pdf("C:\\11.docx","C:\\11.pdf");
//word转html
word2HTML("C:\\11.docx","C:\\11.html");
//word转图片格式
try {
File file = new File("C:\\11.docx");
InputStream inStream = new FileInputStream(file);
List<BufferedImage> wordToImg = wordToImg(inStream,2);//
for(int i=0; i<wordToImg.size(); i++){
//保存图片(单张)
ImageIO.write(wordToImg.get(i), "jpg", new File("C:\\"+ i +".png")); //将其保存在C:/imageSort/targetPIC/下
}
BufferedImage mergeImage = mergeImage(false, wordToImg);
//保存图片(长图)
ImageIO.write(mergeImage, "jpg", new File("C:\\Users\\86152\\Desktop\\word\\xx.png"));
} catch (Exception e) {
e.printStackTrace();
}
word2HTML("C:\\11.docx","C:\\11.html");
}
第二种
import com.aspose.words.Document;
import com.aspose.words.SaveFormat;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
//https://blog.csdn.net/csdnFlyFun/article/details/79523262?locationNum=1&fps=1
public class Word2PDF {
public static void main(String[] args) throws IOException {
//doc2pdf("C:\\word\\11.docx");
}
//替换pdf文本内容
public static void replaceText(PDPage page, String searchString, String replacement) throws IOException {
PDFStreamParser parser = new PDFStreamParser(page);
List<?> tokens = parser.parse();
for (int j = 0; j < tokens.size(); j++) {
Object next = tokens.get(j);
if (next instanceof Operator) {
Operator op = (Operator) next;
String pstring = "";
int prej = 0;
if (op.getName().equals("Tj")) {
COSString previous = (COSString) tokens.get(j - 1);
String string = previous.getString();
string = string.replaceFirst(searchString, replacement);
previous.setValue(string.getBytes());
} else if (op.getName().equals("TJ")) {
COSArray previous = (COSArray) tokens.get(j - 1);
for (int k = 0; k < previous.size(); k++) {
Object arrElement = previous.getObject(k);
if (arrElement instanceof COSString) {
COSString cosString = (COSString) arrElement;
String string = cosString.getString();
if (j == prej) {
pstring += string;
} else {
prej = j;
pstring = string;
}
}
}
if (searchString.equals(pstring.trim())) {
COSString cosString2 = (COSString) previous.getObject(0);
cosString2.setValue(replacement.getBytes());
int total = previous.size() - 1;
for (int k = total; k > 0; k--) {
previous.remove(k);
}
}
}
}
}
List<PDStream> contents = new ArrayList<>();
Iterator<PDStream> streams = page.getContentStreams();
while (streams.hasNext()) {
PDStream updatedStream = streams.next();
OutputStream out = updatedStream.createOutputStream(COSName.FLATE_DECODE);
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
contents.add(updatedStream);
out.close();
}
page.setContents(contents);
}
//移除图片水印
public static void removeImage(PDPage page, String cosName) {
PDResources resources = page.getResources();
COSDictionary dict1 = resources.getCOSObject();
resources.getXObjectNames().forEach(e -> {
if (resources.isImageXObject(e)) {
COSDictionary dict2 = dict1.getCOSDictionary(COSName.XOBJECT);
if (e.getName().equals(cosName)) {
dict2.removeItem(e);
}
}
page.setResources(new PDResources(dict1));
});
}
//移除文字水印
public static boolean removeWatermark(File file) {
try {
//通过文件名加载文档
PDDocument document = Loader.loadPDF(file);
PDPageTree pages = document.getPages();
Iterator<PDPage> iter = pages.iterator();
while (iter.hasNext()) {
PDPage page = iter.next();
//去除文字水印
replaceText(page, "Evaluation Only. Created with Aspose.Words. Copyright 2003-2021 Aspose Pty Ltd.", "");
replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full versions of our APIs please", "");
replaceText(page, "visit: https://products.aspose.com/words/", "");
replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full", "");
replaceText(page, "versions of our APIs please visit: https://products.aspose.com/words/", "");
replaceText(page, "This document was truncated here because it was created in the Evaluation", "");
//去除文字水印
replaceText(page, "Evaluation Only. Created with Aspose.Words. Copyright 2003-2021 Aspose", "");
replaceText(page, "Evaluation Only. Created with Aspose.Words. Copyright 2003-2021 Aspose Pty Ltd.", "");
replaceText(page, "Pty Ltd.", "");
replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full", "");
replaceText(page, "versions of our APIs please visit: https://products.aspose.com/words/", "");
replaceText(page, "This document was truncated here because it was created in the Evaluation", "");
replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full versions of our APIs please visit: https://products.aspose.com/words/", "");
replaceText(page, "Created with an evaluation copy of Aspose.Words. To discover the full versions of", "");
replaceText(page, "our APIs please visit: https://products.aspose.com/words/", "");
//去除图片水印
removeImage(page, "X1");
}
document.removePage(document.getNumberOfPages() - 1);
file.delete();
document.save(file);
document.close();
return true;
} catch (IOException ex) {
ex.printStackTrace();
return false;
}
}
//doc文件转pdf(目前最大支持21页)
//public static void doc2pdf(String wordPath) {
public static void doc2pdf(String wordPath,String pdfPath) {
long old = System.currentTimeMillis();
try {
//新建一个pdf文档
//String pdfPath=wordPath.substring(0,wordPath.lastIndexOf("."))+".pdf";
File file = new File(pdfPath);
FileOutputStream os = new FileOutputStream(file);
//Address是将要被转化的word文档
Document doc = new Document(wordPath);
//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换
doc.save(os, SaveFormat.PDF);
os.close();
//去除水印
removeWatermark(new File(pdfPath));
//转化用时
long now = System.currentTimeMillis();
System.out.println("Word 转 Pdf 共耗时:" + ((now - old) / 1000.0) + "秒");
} catch (Exception e) {
System.out.println("Word 转 Pdf 失败...");
e.printStackTrace();
}
}
}