对于PDF格式的文档,转换为HTML的思路为:
1.首先把PDF文档转换为对应页数的图片N张(利用PDFRendererer.jar,下载地址:https://java.net/projects/pdf-renderer/downloads)。
2.为每张图片生成一个HTML文件。
3.为每个HTML添加链接,使之按照原有的PDF的顺序前后对应。
在这里,贴出两个函数,供大家参考,水平有限,求轻拍。
private void converPDF2Html(String sourceFilepath,String htmlFileDirectory,String targetUrl)
{
int pages=converPDF2Imgs(sourceFilepath, htmlFileDirectory)-1;
int i=1;
int j=i+1;
int k=i-1;
while(i<=pages){
j=i+1;
k=i-1;
String content="";
if(i == 1){
content = "<html><head><META HTTP-EQUIV='CONTENT-TYPE' CONTENT='text/html; charset=utf-8'></head><body>" +
"<img src='"+ i + ".png' />" +
"<table><tr><td><a href='"+ j + ".html'>下一页</a> </td>" +
"<td><a href='" + pages + ".html'>末页</a> </td>" +
"<td><a href ='"+ targetUrl + "'> 返回上一级</a></td></tr></table></body></html>";
}
else if(i == pages){
content = "<html><head><META HTTP-EQUIV='CONTENT-TYPE' CONTENT='text/html; charset=utf-8'></head><body>" +
"<img src='"+ i + ".png' />" +
"<table><tr><td><a href='1.html'>首页</a> </td>" +
"<td><a href='" + k + ".html'>上一页</a> </td>" +
"<td><a href ='"+ targetUrl + "'> 返回上一级</a></td></tr></table></body></html>";
}
else if(i<pages){
content = "<html><head><META HTTP-EQUIV='CONTENT-TYPE' CONTENT='text/html; charset=utf-8'></head><body>" +
"<img src='"+ i + ".png' />" +
"<table> <tr>" +
"<td><a href='1.html'>首页</a> </td>" +
"<td><a href='" + k +".html'>上一页</a> </td>" +
"<td><a href='" + j +".html'>下一页</a> </td>" +
"<td><a href='" + pages + ".html'>末页</a> </td>" +
"<td><a href ='"+ targetUrl + "'> 返回上一级</a></td></tr>" +
"</table></body></html>";
}
String htmlFilepath=htmlFileDirectory+"/"+i+".html";
File file=new File(htmlFilepath);
file=FileUtil.formFileWithDirectory(file);
FileOutputStream fs=null;
OutputStreamWriter out=null;
try {
fs=new FileOutputStream(file);
out=new OutputStreamWriter(fs, "utf-8");
out.write(content);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally
{
try {
out.close();
fs.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
i++;
}
}
private int converPDF2Imgs(String sourceFilepath,String htmlFileDirectory)
{
File file=new File(sourceFilepath);
PDFFile pdfFile=formPDFFile(file);
int pageNum=pdfFile.getNumPages();
int i=1;
while(i<=pageNum)
{
PDFPage page=pdfFile.getPage(i);
int height=(int) page.getBBox().getHeight();
int width=(int) page.getBBox().getWidth();
BufferedImage img=new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
Graphics2D g2=img.createGraphics();
g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
PDFRenderer pdfRender=new PDFRenderer(page, g2, new Rectangle(0, 0, width, height), null, Color.white);
try {
page.waitForFinish();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
pdfRender.run();
g2.dispose();
try {
File imgfile=new File(htmlFileDirectory+"/"+i+".png");
imgfile=FileUtil.formFileWithDirectory(imgfile);
ImageIO.write(img,"png", imgfile);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
i++;
}
return i;
}