使用PDFSharp 判断某一页PDF文件 是否有图片,是否有文本内容,我使用的是c# pdfsharp 版本:1.50.5147
private void Button_Click(object sender, RoutedEventArgs e)
{
//this.tb_html.Text.Trim() 是获取pdf本地路径
using (PdfDocument document = PdfReader.Open(this.tb_html.Text.Trim(), PdfDocumentOpenMode.Modify))
{
foreach (PdfPage page in document.Pages)
{
try
{
bool isImg = false;
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
ICollection<PdfItem> items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
//TODO 有图片
isImg = true;
break;
// do something with your image here
only the first image is handled here
//var bitmap = ExportImage(xObject);
//bmp.Save(@"c:\temp\exported.png", System.Drawing.Imaging.ImageFormat.Bmp);
}
}
}
}
}
// 解析页面内容
CObject content = ContentReader.ReadContent(page);
var text = ExtractText(content)?.ToList();
Debug.WriteLine($"当前页是否存在文本内容{text?.Count > 0}#是否存在图片{isImg}");
}
catch (Exception ex)
{
}
}
}
}