《快学scala》
链接:https://pan.baidu.com/s/1T12_C-cVwfwwJQtbb6Cvdg
提取码:teaj
【注:pdf只有文字的话下面可以使用,《快学scala》转不了,可能是识别成图片了?反正我没成功,有大神会的话可以评论教一下小弟】
1、java提取pdf输出
import com.itextpdf.text.pdf.PdfDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
import java.io.FileWriter;
import java.io.*;
public class PdfReader {
public static String getTextFromPDF(String pdfFilePath)
{
String result = null;
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(pdfFilePath);
PDFParser parser = new PDFParser(is);
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return result;
}
public static void main(String[] args)
{
String str=PdfReader.getTextFromPDF("F:\\51.pdf");
System.out.println(str);
}
}
2、java pdf转doc
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
import java.io.*;
public class PdfToDoc {
public PdfToDoc() {
}
public static void pdfToDoc(String name1) throws IOException {
PDDocument doc = PDDocument.load(name1);
int pagenumber = doc.getNumberOfPages();
name1 = name1.substring(0, name1.lastIndexOf("."));
String fileName = name1 + ".doc";
createFile(fileName);
FileOutputStream fos = new FileOutputStream(fileName);
Writer writer = new OutputStreamWriter(fos, "UTF-8");
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(pagenumber);
stripper.writeText(doc, writer);
writer.close();
doc.close();
System.out.println("pdf转换word成功!");
}
private static void createDir(String destDirName) {
File dir = new File(destDirName);
if (dir.exists()) {
System.out.println("创建目录失败,目标目录已存在!");
}
if (!destDirName.endsWith(File.separator)) {
destDirName = destDirName + File.separator;
}
if (dir.mkdirs()) {
System.out.println("创建目录成功!" + destDirName);
} else {
System.out.println("创建目录失败!");
}
}
public static void createFile(String filePath) {
File file = new File(filePath);
if (file.exists()) {
System.out.println("目标文件已存在" + filePath);
}
if (filePath.endsWith(File.separator)) {
System.out.println("目标文件不能为目录!");
}
if (!file.getParentFile().exists()) {
System.out.println("目标文件所在目录不存在,准备创建它!");
if (!file.getParentFile().mkdirs()) {
System.out.println("创建目标文件所在的目录失败!");
}
}
try {
if (file.createNewFile()) {
System.out.println("创建文件成功:" + filePath);
} else {
System.out.println("创建文件失败!");
}
} catch (IOException var3) {
var3.printStackTrace();
System.out.println("创建文件失败!" + var3.getMessage());
}
}
public static void main(String[] args) throws Exception {
String a = "F:/51.pdf";
pdfToDoc(a);
}