Aspose.Words for Java 是一个强大的库,允许开发人员以编程方式创建、操作、转换和渲染 Word 文档。它提供了广泛的功能,满足各种文档处理需求,成为 Java 开发人员处理 Word 文档的重要工具。本文将探讨 Aspose.Words for Java 的关键功能、优势和用例,并通过一个简单的示例展示其功能。
Aspose.Words for Java 的关键功能
-
文档创建和编辑:
- 从头创建新的 Word 文档。
- 通过添加或删除文本、图像、表格和其他元素来编辑现有文档。
- 操作文档属性、样式和格式。
-
转换能力:
- 将 Word 文档转换为各种格式,包括 PDF、HTML、EPUB 和图像格式(如 PNG 和 JPEG)。
- 支持不同 Word 文档格式之间的转换(DOC、DOCX、RTF、ODT 等)。
-
全面的文档处理:
- 执行邮件合并操作,从模板动态生成文档。
- 使用查找和替换功能以编程方式修改文档内容。
- 提取和操作文档元数据。
-
渲染和打印:
- 将文档渲染为固定布局格式,如 PDF。
- 直接从 Java 应用程序打印文档。
- 将文档转换为图像,用于 Web 和移动应用程序。
-
模板引擎:
- 创建和使用模板以自动生成文档。
- 用来自各种来源(如数据库和 XML 文件)的数据填充模板。
-
兼容性:
- 完全支持最新的 Word 文档标准。
- 在转换和操作过程中,高保真地维护原始文档的外观和结构。
之前刚好用aspose word写了一个导入word内容读取的工具类,今天做一个笔记,记录一下。
首先,引入jar包依赖
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words-jdk17-22.12</artifactId>
<version>22.12</version>
<scope>system</scope>
<systemPath>D:/projectcode/eclipseworkspace/Zblog-master/src/main/webapp/META-INF/lib/aspose-words-jdk17-22.12.jar</systemPath>
</dependency>
然后直接贴读取word内容工具代码,注意代码还有一些小问题,需要根据需求自行优化:
package com.zblog.core.util;
import com.aspose.words.*;
import com.aspose.words.Shape;
import javafx.scene.control.Hyperlink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.awt.*;
import java.io.File;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* java 工具类 读取/写入word工具类
* 日期: 2024年3月5日
* 作者:mgc百科
*/
public class WordUtils {
private final static Logger logger = LoggerFactory.getLogger(WordUtils.class);
/**
* generateEnter
* 生成回车符号
* @param doc 文档对象
* @throws RuntimeException
*/
public static void generateEnter(Document doc) throws RuntimeException {
try{
Run run = new Run(doc, ControlChar.LINE_FEED);
Paragraph para = new Paragraph(doc);
para.appendChild(run);
doc.getFirstSection().getBody().appendChild(para);
}catch (Exception e){
logger.error("WordUtil generateEnter error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* generateText
* 生成word文本,可以设置文字大小、字体
* @param doc 文档对象
* @param text 文本内容
* @param fontName 字体名称
* @param fontSize 字体大小
* @throws RuntimeException
*/
public static void generateText(Document doc, String text, String fontName, double fontSize) throws RuntimeException {
try{
Run run = new Run(doc, text);
run.getFont().setName(fontName);
run.getFont().setSize(fontSize);
Paragraph para = new Paragraph(doc);
para.appendChild(run);
doc.getFirstSection().getBody().appendChild(para);
}catch (Exception e){
logger.error("WordUtil generateText error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* generateTextBlod
* 生成粗体文本
* @param doc 文档对象
* @param text 文本内容
* @param fontName 字体名称
* @param fontSize 字体大小
* @throws RuntimeException
*/
public static void generateTextBlod(Document doc, String text, String fontName, double fontSize) throws RuntimeException {
try{
Run run = new Run(doc, text);
run.getFont().setName(fontName);
run.getFont().setSize(fontSize);
run.getFont().setBold(true);
Paragraph para = new Paragraph(doc);
para.appendChild(run);
doc.getFirstSection().getBody().appendChild(para);
}catch (Exception e){
logger.error("WordUtil generateTextBlod error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* generatePicture
* 生成图片
* @param doc 文档对象
* @param imagePath 图片路径
* @param imageName 图片名称
* @param scaleRatio 缩放大小
* @throws RuntimeException
*/
public static void generatePicture(Document doc, String imagePath,String imageName, double scaleRatio) throws RuntimeException {
try{
DocumentBuilder builder = new DocumentBuilder(doc);
// Insert image using DocumentBuilder
builder.insertImage(imagePath);
// Get the shape of the last inserted image
Shape shape = (Shape) doc.getLastChild();
// Set the name of the shape (optional)
shape.setName(imageName);
// Get the original size of the picture
double originalWidth = shape.getWidth();
double originalHeight = shape.getHeight();
// Calculate the new size based on the scale ratio
double newWidth = originalWidth * scaleRatio;
double newHeight = originalHeight * scaleRatio;
// Set the new size of the picture
shape.setWidth(newWidth);
shape.setHeight(newHeight);
// Set alignment of the paragraph containing the shape
Paragraph paragraph = (Paragraph) shape.getParentNode();
paragraph.getParagraphFormat().setAlignment(ParagraphAlignment.CENTER);
}catch(Exception e){
logger.error("WordUtil generatePicture error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* generateTitle
* 生成word标题
* @param doc 文档对象
* @param title 文档标题
* @param fontName 字体名称
* @param fontSize 字体大小
* @param isBold 是否粗体
* @throws RuntimeException
*/
public static void generateTitle(Document doc, String title, String fontName, double fontSize, boolean isBold) throws RuntimeException {
try{
Run run = new Run(doc, title);
run.getFont().setName(fontName);
run.getFont().setSize(fontSize);
run.getFont().setBold(isBold);
Paragraph para = new Paragraph(doc);
para.appendChild(run);
doc.getFirstSection().getBody().appendChild(para);
}catch(Exception e){
logger.error("WordUtil generateTitle error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* generateSecondLevelTitle
* 生成word二级标题
* @param doc 文档对象
* @param title 文本挑剔
* @param fontName 字体名称
* @param fontSize 字体大小
* @param isBold 是否粗体
* @throws RuntimeException
*/
public static void generateSecondLevelTitle(Document doc, String title, String fontName, double fontSize, boolean isBold) throws RuntimeException {
try {
Run run = new Run(doc, title);
run.getFont().setName(fontName);
run.getFont().setSize(fontSize);
run.getFont().setBold(isBold);
Paragraph para = new Paragraph(doc);
para.getParagraphFormat().setStyleIdentifier(StyleIdentifier.HEADING_2);
para.appendChild(run);
doc.getFirstSection().getBody().appendChild(para);
} catch (Exception e) {
logger.error("WordUtil generateSecondLevelTitle error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* readWordText
* 读取word文本内容
* @param doc 文档对象
* @throws RuntimeException
*/
public static void readWordText(Document doc) throws RuntimeException {
try {
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs) {
System.out.println(paragraph.toString(SaveFormat.TEXT).trim());
}
} catch (Exception e) {
logger.error("WordUtil readWordText error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* getParagraphContent
* 获取word内容
* @param inputFilePath 文档对象
* @param imageOutputDirectory 输出内容
* @param imageprefix 图片名称前缀
* @throws RuntimeException
*/
public static String getParagraphContent(String inputFilePath,String imageOutputDirectory,String imageprefix) throws RuntimeException {
StringBuilder returnContent = new StringBuilder();
try {
Document doc;
try {
doc = new Document(inputFilePath);
} catch (Exception e) {
logger.error("WordUtils getParagraphContent inputFilePath error",e);
throw new RuntimeException(e.getMessage());
}
try {
NodeCollection nodes = doc.getChildNodes(NodeType.ANY, true);
int index = 0;
int nodecount = nodes.getCount();
for (Node node : (Iterable<Node>) nodes ) {
if(index<nodecount-1){
if (node.getNodeType() == NodeType.PARAGRAPH) {
Paragraph paragraph = (Paragraph)node;
String returntext = getWordText(paragraph);
if(StringUtils.isNotEmpty(returntext)){
returnContent.append(returntext);
returnContent.append("\t\n");
}
} else if (node.getNodeType() == NodeType.SHAPE && ((Shape) node).hasImage()) {
String filename = imageprefix+"_"+index;
String imagecontent = getWordImagePath(node,imageOutputDirectory,filename);
if(StringUtils.isNotEmpty(imagecontent)){
returnContent.append(imagecontent);
returnContent.append("\t\n");
}
}
}
index++;
}
return returnContent.toString();
} catch (Exception e) {
logger.error("WordUtils getParagraphContent getcontent error",e);
throw new RuntimeException(e.getMessage());
}
}catch (Exception e){
logger.error("WordUtil getParagraphContent error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* getWordImagePath
* 获取word文档图片路径
* @param node 文本节点
* @param imageOutputDirectory 图片输出地址
* @param filaName 图片名称
* @return 图片路径
* @throws RuntimeException
*/
public static String getWordImagePath(Node node,String imageOutputDirectory,String filaName) throws RuntimeException {
try{
Shape shape = (Shape) node;
ImageData imageData = shape.getImageData();
if (imageData != null) {
String imageFormat = getImageFormat(imageData.getImageType());
filaName = filaName+ "." + imageFormat;
String imageFileName = imageOutputDirectory + File.separator+ filaName;
File filedirect = new File(imageOutputDirectory);
if(!filedirect.exists()){
filedirect.mkdirs();
}
File imagefile = new File(imageFileName);
if(!imagefile.exists()){
imagefile.createNewFile();
}
imageData.save(imageFileName);
logger.info("Image " +filaName + " saved as: " + filaName+ "." + imageFormat );
return imageFileName;
}
return "";
}catch(Exception e){
logger.error("Wordutils getWordImage error",e.getMessage());
throw new RuntimeException(e.getMessage());
}
}
/**
* getWordImageName
* 获取word文档图片路径
* @param node 文本节点
* @param imageOutputDirectory 图片输出地址
* @param filaName 图片名称
* @return 图片名称
* @throws RuntimeException
*/
public static String getWordImageName(Node node,String imageOutputDirectory,String filaName) throws RuntimeException {
try{
Shape shape = (Shape) node;
ImageData imageData = shape.getImageData();
if (imageData != null) {
String imageFormat = getImageFormat(imageData.getImageType());
filaName = filaName+ "." + imageFormat;
String imageFileName = imageOutputDirectory + filaName;
imageData.save(imageFileName);
logger.info("Image " +filaName + " saved as: " + filaName+ "." + imageFormat );
return filaName;
}
return "";
}catch(Exception e){
logger.error("Wordutils getWordImage error",e.getMessage());
throw new RuntimeException(e.getMessage());
}
}
/**
* getWordText
* 获取word文档文本 或者 链接
* @param paragraph 文档段落
* @return 返回文本内容
* @throws RuntimeException
*/
public static String getWordText(Paragraph paragraph) throws RuntimeException {
StringBuilder returnContent = new StringBuilder();
try{
for (Run run : paragraph.getRuns()) {
// Check if the run has hyperlink-like formatting
if (run.getFont().getUnderline() != Underline.NONE && !run.getFont().getColor().equals(Color.BLACK)) {
String linkText = run.getText();
String linkUrl = getHyperlinkUrl(paragraph.getDocument(), run);
String returnText = "[["+linkText + " - " + linkUrl+"]]";
if(StringUtils.isNotEmpty(returnText)&&!returnText.contains("HYPERLINK")){
returnContent.append(returnText);
}
}else{
String content =paragraph.toString(SaveFormat.TEXT).trim();
if(StringUtils.isNotEmpty(content)&&returnContent.indexOf(content)==-1){
returnContent.append(paragraph.toString(SaveFormat.TEXT).trim());
}
}
}
return returnContent.toString();
}catch (Exception e){
logger.error("WordUtil getWordText error",e);
throw new RuntimeException(e.getMessage());
}
}
/**
* getWordTextLinks
* 获取word文档链接文本和url
* @param paragraph 文档节点
* @throws RuntimeException
*/
public static void getWordTextLinks(Paragraph paragraph) throws RuntimeException {
String text = "";
for (Run run : paragraph.getRuns()) {
if (run.getFont().getUnderline() != Underline.NONE && !run.getFont().getColor().equals(Color.BLACK)) {
String linkText = run.getText();
String linkUrl = getHyperlinkUrl(paragraph.getDocument(), run);
text = linkText + " - " + linkUrl;
}
}
System.out.println("Hyperlink in paragraph: " + text);
}
public static String getHyperlinkUrl(CompositeNode paragraph, Run run) {
Node currentNode = run;
while (currentNode != null && !(currentNode instanceof FieldStart)) {
currentNode = currentNode.getPreviousSibling();
}
if (currentNode != null) {
FieldStart fieldStart = (FieldStart) currentNode;
Field field = fieldStart.getField();
if (field.getType() == FieldType.FIELD_HYPERLINK) {
FieldHyperlink fieldHyperlink = (FieldHyperlink) field;
return fieldHyperlink.getAddress();
}
}
return "";
}
/**
* getImageFormat
* 获取word文档图片类型
*
* @param imageType 图片类型
* @return
* @throws RuntimeException
*/
private static String getImageFormat(int imageType) throws RuntimeException {
switch (imageType) {
case ImageType.EMF:
return "emf";
case ImageType.WMF:
return "wmf";
case ImageType.JPEG:
return "jpeg";
case ImageType.PNG:
return "png";
case ImageType.BMP:
return "bmp";
default:
return "unknown";
}
}
public static void main(String[] args) throws RuntimeException {
String inputFilePath = "G:\\幻想工具箱\\工具箱推广\\办公软件推荐-Coolmuster Word 到 PDF 转换器.docx";
String imageOutputDirectory = "E:\\blogfile\\blog\\image\\";
String wordtext = getParagraphContent(inputFilePath,imageOutputDirectory,"test");
System.out.println(wordtext);
}
private static void extractHyperlinks(Paragraph paragraph) {
String text = "";
for (Run run : paragraph.getRuns()) {
if (run.getFont().getUnderline() != Underline.NONE && !run.getFont().getColor().equals(Color.BLACK)) {
String linkText = run.getText();
String linkUrl = getHyperlinkUrl(paragraph.getDocument(), run);
text = linkText + " - " + linkUrl;
}
}
System.out.println("Hyperlink in paragraph: " + text);
}
}