<repositories>
<repository>
<id>AsposeJavaAPI</id>
<name>Aspose Java API</name>
<url>http://repository.aspose.com/repo</url>
</repository>
</repositories>
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-pdf</artifactId>
<version>22.5</version>
</dependency>
import java.util.List;
import com.aspose.pdf.AbsorbedTable;
import com.aspose.pdf.Document;
import com.aspose.pdf.Field;
import com.aspose.pdf.Page;
import com.aspose.pdf.PageCollection;
import com.aspose.pdf.TableAbsorber;
import com.aspose.pdf.TextFragmentCollection;
public class PdfHelper {
public static void parse(String filePath) {
// Open document
Document pdf = new Document(filePath);
Field[] fields = pdf.getForm().getFields();
for (int i = 0; i < fields.length; i++) {
System.out.println("Form field: " + fields[i].getFullName());
System.out.println("Form field: " + fields[i].getValue());
}
PageCollection pc = pdf.getPages();
Page page = pc.get_Item(38);
TableAbsorber ta = new TableAbsorber();
ta.visit(page);
List<AbsorbedTable> ts = ta.getTableList();
int[] table = new int[2];
// 开始循环表格
ts.forEach(s -> {
// s.getRowList().get(0).getCellList().get(0).getTextFragments().get_Item(0).getText();
// 行
s.getRowList().stream().forEach(row -> {
table[0]++;
// 列
table[1] = 0;
row.getCellList().stream().forEach(cell -> {
table[1]++;
TextFragmentCollection textFragmentC = cell.getTextFragments();
StringBuffer sb = new StringBuffer();
textFragmentC.forEach(tf -> {
String txt = tf.getText();
sb.append(txt);
});
System.out.println(String.format("%d,%d===>%s", table[0], table[1], sb.toString()));
});
});
});
pdf.close();
}
public static void optimize(String source, String target) {
Document doc = new Document(source);
// 设置压缩属性
Document.OptimizationOptions opt = new Document.OptimizationOptions();
// 删除PDF不必要的对象
opt.setRemoveUnusedObjects(true);
// 链接重复流
opt.setLinkDuplcateStreams(false);
// 删除未使用的流
opt.setRemoveUnusedStreams(false);
// 删除不必要的字体
opt.setUnembedFonts(true);
// 压缩PDF中的图片
opt.setCompressImages(true);
// 图片压缩比, 0 到100可选,越低压缩比越大
opt.setImageQuality(80);
doc.optimizeResources(opt);
// 优化web的PDF文档
doc.optimize();
doc.save(target);
}
public static void main(String[] args) {
//解析pdf 表格
parse("/your/file/path/**.pdf");
//pdf压缩
String source = "/your/file/path/source.pdf";
String target = "/your/file/path/target.pdf";
optimize(source, target);
}
}