iText操作pdf文件

最新推荐文章于 2024-04-22 18:27:34 发布

yoyochina

最新推荐文章于 2024-04-22 18:27:34 发布

阅读量232

点赞数

分类专栏： Java 文章标签： Java .net C C++ C#

Java 专栏收录该内容

95 篇文章 0 订阅

订阅专栏

这几天研究了一下对pdf文件的操作，简单点的比如怎样用iText来复制一个pdf文件等，这是对本地的pdf文件进行操作。当然，iText还可以对网络上的pdf文件进行操作，比如下载网络上的pdf文件等。下面的代码就是专门针对http://www.jms20x.com/dzts/default.html上的pdf文件进行下载的。这可以对中文进行操作，而且能很好的保存成原文件的版式。

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;

import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.PdfCopy;
import com.lowagie.text.pdf.PdfImportedPage;
import com.lowagie.text.pdf.PdfReader;

public class downloadFiles 
{
	private static final String urlname = "http://www.jms20x.com/dzts/";
	private static ArrayList<String> urllist = new ArrayList<String>();
	private static ArrayList<String> savelist = new ArrayList<String>();
	
	public static void main(String[] args)
	{		
		String savepath = "";
		String tempurl = "";
		
		for(int i=1; i<=100; i++)
		{
			if(i==61 || i==63 || i==65 || i==66 || i==70 || i==71 || i==74 || i==82 || 
			   i==87 || i==89 || i==91 || i==93 || i==95 || i==97 )
				continue;
			
			savepath = "C:\\" + i + "\\";
			tempurl = urlname + i + "/";
			
			downloadFiles.getURLS(tempurl, savepath, urllist, savelist);
			if(urllist.size() > 0)
			{
				System.out.println("Creating a new directory : C:\\" + i );
				for(int j=0; j<urllist.size(); j++)
				{
					System.out.println(j);
					downloadFiles.getWebFiles(urllist.get(j), savelist.get(j));
				}
				
				urllist.clear();
				savelist.clear();
			}
		}
	}
	
	public static void getURLS(String urlname, String savedir, ArrayList<String> urllist, ArrayList<String> savelist)
	{		
		try 
		{
			URL url = new URL(urlname + "/index.html");
			HttpURLConnection httpconn =(HttpURLConnection)url.openConnection();
			
			BufferedReader br = new BufferedReader(new InputStreamReader(httpconn.getInputStream()));
						
			String str = br.readLine();
			String temp = "";
			String urlpath = "";
			String savepath = "";
			
			while(str != null)
			{
				if(str.indexOf("<a href=") > -1)
				{
					if(str.indexOf("</a>") > -1)
					{
						temp = str.substring(str.indexOf("<a href=")+9, str.indexOf("</a>"));
						
						urlpath = urlname + temp.substring(0, temp.indexOf(".pdf")+4);
						if(urlpath.length() > 60)
						{
							str = br.readLine();
							continue;
						}
						
						savepath = temp.substring(temp.indexOf(">")+1) + ".pdf";
						if(savepath.indexOf(":") > -1)
							savepath = savepath.replace(':', '_');
						if(savepath.indexOf("：") > -1)
							savepath = savepath.replace('：', '_');
						savepath = savedir + savepath;

						urllist.add(urlpath);
						savelist.add(savepath);
					}
				}
								
				str = br.readLine();
			}
			
			httpconn.disconnect();
			
		} catch (MalformedURLException e) {
			return;
		} catch (IOException e) {
			return;
		} 		
	}
	
	public static void getWebFiles(String urlpath, String savepath)
	{
		String tempdir = savepath.substring(0, savepath.lastIndexOf("\\"));
		File dir = new File(tempdir);
		if(!dir.exists())
			dir.mkdir();
		
		File f = new File(savepath);
		if(f.exists())		
			return;
		
		try 
		{
			URL url = new URL(urlpath);
			URLConnection conn = url.openConnection(); 
			HttpURLConnection httpconn =(HttpURLConnection)conn;
			
			if(httpconn.getContentLength() > 20000000)
			{
				httpconn.disconnect();
				return;
			}	
			
			f.createNewFile();
			
			InputStream is = httpconn.getInputStream();
			PdfReader reader = new PdfReader(is);			
			httpconn.disconnect();

			int n = reader.getNumberOfPages();
			Document document = new Document(reader.getPageSize(1));
			PdfCopy copy = new PdfCopy(document, new FileOutputStream(f));
			document.open();
			
			for(int i=1; i<=n; i++)
			{
				document.newPage(); 
				PdfImportedPage page = copy.getImportedPage(reader, i);
				copy.addPage(page);
			}
			
			document.close();

		} catch (MalformedURLException e1) {
			return;
		} catch (IOException e) {
			return;
		} catch(DocumentException e) {
			return;
		} catch(Exception e) {
			if(f.exists())		
				f.delete();
			return;
		}
	}
}

需要说明的是，这里需要用itext-2.0.2.jar包和bcprov-jdk15-139.jar，不能使用最新的itext-2.1.4.jar版本，否则会出现错误。可以使用附件里面的文件进行下载，解压后直接双击Start.bat就可进行下载了（默认是保存在C盘里）。对JDK为1.5和1.6的，可以分别使用附件里对应版本的文件。

对本地pdf文件的操作差不多。下面的代码是针对本地pdf文件进行操作的一个简单例子，就是对文件进行复制。

import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.PdfCopy;
import com.lowagie.text.pdf.PdfImportedPage;
import com.lowagie.text.pdf.PdfReader;

public class pdfCopy 
{
	public static void main(String[] args)
	{
		String filepath = "C:\\a.pdf";
		String savepath = "C:\\b.pdf";
		copyLocalFiles(filepath, savepath);
	}
	
	public static void copyLocalFiles(String filepath, String savepath)
	{
		try 
		{
			PdfReader reader = new PdfReader(filepath);
			
			int n = reader.getNumberOfPages();
			Document document = new Document(reader.getPageSize(1));
			PdfCopy copy = new PdfCopy(document, new FileOutputStream(savepath));
						
			document.open();
			
			for(int i=1; i<=n; i++)
			{
				document.newPage(); 
				PdfImportedPage page = copy.getImportedPage(reader, i);
				copy.addPage(page);
			}
			
			document.close();

		} catch (IOException e) {
			e.printStackTrace();
		} catch(DocumentException e) {
			e.printStackTrace();
		}
	}	
}

上面的代码只是简单的对本地和网络文件进行操作的例子，还算比较简单。

下面是学习iText的几个比较有用的网址。

http://itextdocs.lowagie.com/tutorial/

http://www.1t3xt.info/api/