import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.*;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.cos.*;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import java.io.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
@Slf4j
public class test {
private static Pattern pattern = Pattern.compile("\\s*|\t|\r|\n");
//存放值
private static Set<COSString> strings = new HashSet<COSString>();
//存放名称和值
private static Map<COSName, COSString> map = new HashMap<>();
public static void main(String[] args) throws IOException {
String inputPath = "文件路口.pdf";
PDDocument doc = PDDocument.load(new File(inputPath));
for (COSObject obj : doc.getDocument().getObjects()) {
COSBase base = obj.getObject();
// System.out.println(obj + ": " + base);
cosList(base);
}
// System.out.println(strings.size() + " strings:");
//取JavaScript值
System.out.println("测试: "+map.get(COSName.getPDFName("JavaScript")));
for (Map.Entry<COSName, COSString> entry : map.entrySet()) {
//打印PDF中所有的COS信息
String mapKey = entry.getKey().getName();
String mapValue = entry.getValue().getString();
System.out.println(mapKey + ":" + mapValue);
}
/* for (COSString s : strings)
{
String str = s.getString();
if (!str.contains("\n"))
{
// avoid display problems with netbeans
str = str.replaceAll("\r", "\n").replaceAll("\n\n", "\n");
}
System.out.println("str : "+str);
}*/
doc.close();
}
private static void cosList(COSBase base) {
if (base instanceof COSString) {
strings.add((COSString) base);
return;
}
//注意 COSStream 可以隐藏脚本JavaScript
if (base instanceof COSStream) {
COSStream csm = (COSStream) base;
for (COSName key : csm.keySet()) {
COSString c = null;
COSName n = null;
if (csm.getDictionaryObject(key) instanceof COSString) {
c = (COSString) csm.getDictionaryObject(key);
}
if (csm.getDictionaryObject(key) instanceof COSName) {
n = (COSName) csm.getDictionaryObject(key);
}
if (c != null && n != null) {
map.put(n, c);
}
cosList(csm.getDictionaryObject(key));
}
}
if (base instanceof COSDictionary) {
COSDictionary dict = (COSDictionary) base;
COSString c = null;
COSName n = null;
for (COSName key : dict.keySet()) {
if (dict.getDictionaryObject(key) instanceof COSString) {
c = (COSString) dict.getDictionaryObject(key);
}
if (dict.getDictionaryObject(key) instanceof COSName) {
n = (COSName) dict.getDictionaryObject(key);
}
if (c != null && n != null) {
map.put(n, c);
}
cosList(dict.getDictionaryObject(key));
}
return;
}
if (base instanceof COSArray) {
COSArray ar = (COSArray) base;
for (COSBase item : ar) {
cosList(item);
}
return;
}
if (base instanceof COSNull ||
base instanceof COSObject ||
base instanceof COSName ||
base instanceof COSNumber ||
base instanceof COSBoolean ||
base instanceof COSString ||
base instanceof COSStream ||
base == null) {
return;
}
System.out.println("baseCos " + base);
}
}
根据上述代码可以捕获到pdf中设置的信息例如
测试: COSString{app.alert('XSS')}
Form:D:20221027140839+08'00'
JavaScript:app.alert('XSS')
SimHei:黑体
Arial:Arial
Watermark:D:20221027140839+08'00'
XObject:D:20221027140839+08'00'
FontDescriptor:Arial可以根据 JavaScript 对应是否有值进行拦截