学习的版本iTextSharp.5.5.5。
关于获取PDF中的图片资源
/// <summary>
/// 将PDF中的图片资源转换成二进制
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public List<byte[]> ConvertImageData(string filePath)
{
List<byte[]> lst = new List<byte[]>();
using (PdfReader pdf = new PdfReader(filePath))
{
for (int i = 1; i <= pdf.NumberOfPages; i++)
{
byte[] result = null;
PdfDictionary page = pdf.GetPageN(pageNum);
PdfObject obj = FindImageInPDFDictionary(page);
if (obj != null)
{
PdfImageObject imageObj = null;
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PRStream pdfStrem = (PRStream)pdfObj;
imageObj = new PdfImageObject(pdfStrem);
result = imageObj.GetImageAsBytes();
}
lst.Add(result);
}
}
return lst;
}
/// <summary>
/// 提取图片元素
/// </summary>
/// <param name="pg"></param>
/// <returns></returns>
private static PdfObject FindImageInPDFDictionary(PdfDictionary pg)
{
PdfDictionary res = pg.GetAsDict(PdfName.RESOURCES);
PdfDictionary xobj = res.GetAsDict(PdfName.XOBJECT);
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type))
{
return obj;
}// image inside a form
else if (PdfName.FORM.Equals(type) || PdfName.GROUP.Equals(type))
{
return FindImageInPDFDictionary(tg);
}
}
}
}
return null;
}