itext pdf linux 乱码,iTextSharp读取pdf内容为乱码

83574

private string ReadPpf()

{

string fn = @"E:\PDFReaderTest\article\C#从入门到精通.pdf";

PdfReader p = new PdfReader(fn);

//从每一页读出的字符串

string str = System.String.Empty;

//"[......]"内部字符串

string subStr = System.String.Empty;

//函数返回的字符串

string rtStr = System.String.Empty;

//从每一页读出的8位字节数组

byte[] b = new byte[0];

//"[","]","(",")"在字符串中的位置

Int32 bg = 0, ed = 0, subbg = 0, subed = 0;

//取得文档总页数

int pg = p.NumberOfPages;

System.Text.StringBuilder sb = new System.Text.StringBuilder();

for (int i = 1; i <= pg; i++)

{

bg = 0;

ed = 0;

Array.Resize(ref b, 0);

//取得第i页的内容

b = p.GetPageContent(i);

//下一行是把每一页的取得的字节数据写入一个txt的文件,仅供研究时用

System.IO.File.WriteAllBytes(@"E:\PDFReaderTest\article\xct.txt", b);

//取得每一页的字节数组,将每一个字节转换为字符,并将数组转换为字符串

for (int j = 0; j < b.Length; j++)

{

sb.Append(Convert.ToChar(b[j]));

}

str = sb.ToString() ;

}

return str;

//System.Text.StringBuilder text = new System.Text.StringBuilder();

//string fileName = @"E:\PDFReaderTest\article\xct.pdf";

//if (File.Exists(fileName))

//{

//    PdfReader pdfReader = new PdfReader(fileName);

//    for (int page = 1; page <= pdfReader.NumberOfPages; page++)

//    {

//        ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();

//        string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

//        currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

//        text.Append(currentText);

//    }

//    pdfReader.Close();

//}

//return text.ToString();

//string fileName = @"E:\PDFReaderTest\article\xct.pdf";

//PdfReader reader = new PdfReader(file);

//string text = PdfTextExtractor.GetTextFromPage(reader, 1);

//try { reader.Close(); }

//catch { }

//return text;

//try

//{

//    string pdffilename = @"E:\PDFReaderTest\article\未命名.pdf";

//    PdfReader pdfReader = new PdfReader(pdffilename);

//    int numberOfPages = pdfReader.NumberOfPages;

//    string text = string.Empty;

//    for (int i = 1; i <= numberOfPages; ++i)

//    {

//        byte[] bufferOfPageContent = pdfReader.GetPageContent(i);

//        text += System.Text.Encoding.UTF8.GetString(bufferOfPageContent);

//    }

//    pdfReader.Close();

//    return text;

//}

//catch (Exception ex)

//{

//    return null;

//}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值