Java操作PDF文档(PDFBox)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
package
com.leixinhui.test;
import
java.io.FileInputStream;
import
java.io.InputStream;
import
java.io.OutputStreamWriter;
import
java.io.Writer;
import
org.apache.pdfbox.pdmodel.PDDocument;
import
org.apache.pdfbox.util.PDFTextStripper;
public
class
Test {
/**
* @param args
* @throws Exception
* @throws SQLException
*/
public
static
void
main(String[] args)
throws
Exception {
String strFile =
"F:\\代码\\Java\\Java操作PDF文档\\PDFbox使用教程.pdf"
;
new
Test().getText(strFile);
}
/**
* 打印pdf文档文本内容
* @param strFile pdf文件
* @throws Exception
*/
private
void
getText(String strFile)
throws
Exception{
boolean
sort =
false
;
int
startPage =
1
;
int
endPage = Integer.MAX_VALUE;
InputStream inputStream =
null
;
Writer outWriter =
null
;
PDFTextStripper textStripper =
null
;
PDDocument document =
null
;
try
{
inputStream =
new
FileInputStream(strFile);
outWriter =
new
OutputStreamWriter( System.out );
textStripper =
new
PDFTextStripper();
document = PDDocument.load(inputStream);
textStripper.setSortByPosition( sort );
textStripper.setStartPage( startPage );
textStripper.setEndPage( endPage );
textStripper.writeText(document, outWriter);
}
catch
(Exception e) {
e.printStackTrace();
throw
e;
}
finally
{
inputStream.close();
outWriter.flush();
outWriter.close();
}
}
}
|
备注:使用Apache PDFBox 1.6.0