从 https://pdfbox.apache.org/ 下载 PDFbox-app-2.0.9.jar 包
pdfbox-app-2.0.9.jar
pdfbox-2.0.9-src.zip
\pdfbox-2.0.9-src\examples\src\main\java\org\apache\pdfbox\examples\pdmodel\PrintBookmarks.java
package test;
import java.io.*;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
public class PrintBookmarks
{
/**
* This will print the documents 书签 to System.out.
*
* @param bookmark The 书签 to print out.
* @param indentation A pretty printing parameter
*
* @throws IOException If there is an error getting the page count.
*/
public void printBookmark( PDOutlineNode bookmark, String indentation ) throws IOException
{
PDOutlineItem current = bookmark.getFirstChild();
while( current != null )
{
int pages =0;
if (current.getDestination() instanceof PDPageDestination)
{
PDPageDestination pd = (PDPageDestination) current.getDestination();
pages = (pd.retrievePageNumber() +1);
}
if (current.getAction() instanceof PDActionGoTo)
{
PDActionGoTo gta = (PDActionGoTo) current.getAction();
if (gta.getDestination() instanceof PDPageDestination)
{
PDPageDestination pd = (PDPageDestination) gta.getDestination();
pages = (pd.retrievePageNumber() +1);
}
}
if (pages ==0)
System.out.println( indentation + current.getTitle());
else
System.out.println( indentation + current.getTitle() +" "+ pages);
printBookmark( current, indentation + " " ); // 递归调用
current = current.getNextSibling();
}
}
//
public static void main( String[] args ) throws Exception
{
if( args.length != 1 )
{
System.out.println( " usage: java PrintBookmarks file1.pdf " );
return;
}
File file1 = new File(args[0]);
if (!file1.exists()){
System.err.println(" file is not exists ");
return;
}
// 开始读取 PDF文档
PDDocument document = null;
FileInputStream fis = null;
try
{
fis = new FileInputStream(file1);
PDFParser parser = new PDFParser(new RandomAccessBuffer(fis));
parser.parse();
document = parser.getPDDocument();
PrintBookmarks the = new PrintBookmarks();
PDDocumentOutline outline = document.getDocumentCatalog().getDocumentOutline();
if( outline != null )
{
the.printBookmark( outline, "" );
}
else
{
System.out.println( "This document does not contain any bookmarks" );
}
}
finally
{
if( fis != null ) fis.close();
if( document != null ) document.close();
}
}
}
编译 compile.bat
set JAR=pdfbox-app-2.0.9.jar
javac -cp %JAR% -d . PrintBookmarks.java
运行 run1.bat
set JAR=pdfbox-app-2.0.9.jar
java -Xms128m -Xmx512m -cp %JAR%;. test.PrintBookmarks %1
例如: cmd
run1.bat Hadoop权威指南_第四版_中文版.pdf > hadoop4.txt
运行 cmd
java -jar pdfbox-app-2.0.9.jar
PDFBox version: "2.0.9"
Usage: java -jar pdfbox-app-x.y.z.jar <command> <args..>
Possible commands are:
ConvertColorspace
Decrypt
Encrypt
ExtractText
ExtractImages
OverlayPDF
PrintPDF
PDFDebugger
PDFMerger
PDFReader
PDFSplit
PDFToImage
TextToPDF
WriteDecodedDoc