这几天研究了一下对pdf文件的操作,简单点的比如怎样用iText来复制一个pdf文件等,这是对本地的pdf文件进行操作。当然,iText还可以对网络上的pdf文件进行操作,比如下载网络上的pdf文件等。下面的代码就是专门针对http://www.jms20x.com/dzts/default.html上的pdf文件进行下载的。这可以对中文进行操作,而且能很好的保存成原文件的版式。
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.PdfCopy;
import com.lowagie.text.pdf.PdfImportedPage;
import com.lowagie.text.pdf.PdfReader;
public class downloadFiles
{
private static final String urlname = "http://www.jms20x.com/dzts/";
private static ArrayList<String> urllist = new ArrayList<String>();
private static ArrayList<String> savelist = new ArrayList<String>();
public static void main(String[] args)
{
String savepath = "";
String tempurl = "";
for(int i=1; i<=100; i++)
{
if(i==61 || i==63 || i==65 || i==66 || i==70 || i==71 || i==74 || i==82 ||
i==87 || i==89 || i==91 || i==93 || i==95 || i==97 )
continue;
savepath = "C:\\" + i + "\\";
tempurl = urlname + i + "/";
downloadFiles.getURLS(tempurl, savepath, urllist, savelist);
if(urllist.size() > 0)
{
System.out.println("Creating a new directory : C:\\" + i );
for(int j=0; j<urllist.size(); j++)
{
System.out.println(j);
downloadFiles.getWebFiles(urllist.get(j), savelist.get(j));
}
urllist.clear();
savelist.clear();
}
}
}
public static void getURLS(String urlname, String savedir, ArrayList<String> urllist, ArrayList<String> savelist)
{
try
{
URL url = new URL(urlname + "/index.html");
HttpURLConnection httpconn =(HttpURLConnection)url.openConnection();
BufferedReader br = new BufferedReader(new InputStreamReader(httpconn.getInputStream()));
String str = br.readLine();
String temp = "";
String urlpath = "";
String savepath = "";
while(str != null)
{
if(str.indexOf("<a href=") > -1)
{
if(str.indexOf("</a>") > -1)
{
temp = str.substring(str.indexOf("<a href=")+9, str.indexOf("</a>"));
urlpath = urlname + temp.substring(0, temp.indexOf(".pdf")+4);
if(urlpath.length() > 60)
{
str = br.readLine();
continue;
}
savepath = temp.substring(temp.indexOf(">")+1) + ".pdf";
if(savepath.indexOf(":") > -1)
savepath = savepath.replace(':', '_');
if(savepath.indexOf(":") > -1)
savepath = savepath.replace(':', '_');
savepath = savedir + savepath;
urllist.add(urlpath);
savelist.add(savepath);
}
}
str = br.readLine();
}
httpconn.disconnect();
} catch (MalformedURLException e) {
return;
} catch (IOException e) {
return;
}
}
public static void getWebFiles(String urlpath, String savepath)
{
String tempdir = savepath.substring(0, savepath.lastIndexOf("\\"));
File dir = new File(tempdir);
if(!dir.exists())
dir.mkdir();
File f = new File(savepath);
if(f.exists())
return;
try
{
URL url = new URL(urlpath);
URLConnection conn = url.openConnection();
HttpURLConnection httpconn =(HttpURLConnection)conn;
if(httpconn.getContentLength() > 20000000)
{
httpconn.disconnect();
return;
}
f.createNewFile();
InputStream is = httpconn.getInputStream();
PdfReader reader = new PdfReader(is);
httpconn.disconnect();
int n = reader.getNumberOfPages();
Document document = new Document(reader.getPageSize(1));
PdfCopy copy = new PdfCopy(document, new FileOutputStream(f));
document.open();
for(int i=1; i<=n; i++)
{
document.newPage();
PdfImportedPage page = copy.getImportedPage(reader, i);
copy.addPage(page);
}
document.close();
} catch (MalformedURLException e1) {
return;
} catch (IOException e) {
return;
} catch(DocumentException e) {
return;
} catch(Exception e) {
if(f.exists())
f.delete();
return;
}
}
}
需要说明的是,这里需要用itext-2.0.2.jar包和bcprov-jdk15-139.jar,不能使用最新的itext-2.1.4.jar版本,否则会出现错误。可以使用附件里面的文件进行下载,解压后直接双击Start.bat就可进行下载了(默认是保存在C盘里)。对JDK为1.5和1.6的,可以分别使用附件里对应版本的文件。
对本地pdf文件的操作差不多。下面的代码是针对本地pdf文件进行操作的一个简单例子,就是对文件进行复制。
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.PdfCopy;
import com.lowagie.text.pdf.PdfImportedPage;
import com.lowagie.text.pdf.PdfReader;
public class pdfCopy
{
public static void main(String[] args)
{
String filepath = "C:\\a.pdf";
String savepath = "C:\\b.pdf";
copyLocalFiles(filepath, savepath);
}
public static void copyLocalFiles(String filepath, String savepath)
{
try
{
PdfReader reader = new PdfReader(filepath);
int n = reader.getNumberOfPages();
Document document = new Document(reader.getPageSize(1));
PdfCopy copy = new PdfCopy(document, new FileOutputStream(savepath));
document.open();
for(int i=1; i<=n; i++)
{
document.newPage();
PdfImportedPage page = copy.getImportedPage(reader, i);
copy.addPage(page);
}
document.close();
} catch (IOException e) {
e.printStackTrace();
} catch(DocumentException e) {
e.printStackTrace();
}
}
}
上面的代码只是简单的对本地和网络文件进行操作的例子,还算比较简单。
下面是学习iText的几个比较有用的网址。
http://itextdocs.lowagie.com/tutorial/