PDFBox下载地址:http://sourceforge.net/projects/pdfbox/postdownload?source=dlp
解压下载到的rar .拿到源码中bin文件夹下的 四个dll
IKVM.GNU.Classpath.dll
IKVM.Runtime.dll
PDFBox-0.7.3.dll
FontBox-0.1.0-dev.dll
在使用的项目中添加这4个dll的引用。
/// <summary>
/// 将PDF转换成txt
/// </summary>
/// <param name="file"></param>
/// <param name="txtfile"></param>
public void pdf_txt(FileInfo file,FileInfo txtfile)
{
PDDocument doc = PDDocument.load(file.FullName);
PDFTextStripper pdfStripper = new PDFTextStripper();
string text = pdfStripper.getText(doc);
StreamWriter swPdfChange = new StreamWriter(txtfile.FullName, false, Encoding.GetEncoding("gb2312"));
swPdfChange.Write(text);
swPdfChange.Close();
}
调用方式:
FileInfo pdffile = new FileInfo(@"E:\work\Analyzer\4_develop\trunk\IsAnalyzer\UploadFile\1.txt"); //此处加载的是读取出来的文本存放的txt文件
pdf_txt(file, pdffile);
StreamReader sr = new StreamReader(pdffile.FullName, Encoding.GetEncoding("gb2312"));
StringBuilder sb=new StringBuilder();
while (!sr.EndOfStream)
{
sb.Append(sr.ReadLine());
}