针对客户需求,首先需要实现PDF转图片,经科普,两种不同的PDF分别指:单层:以图片为基础的PDF文档,鼠标滑动无法进行勾选。双层:文字会浮于底层的PDF之上,而且鼠标滑动可见勾选的信息被框选。
- 单层PDF获取图片
说明:单层本事实质就是图片,需要将PDF中的图片按页提取即可。
使用的依赖通过nuget自行下载PdfPig。
using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(path))
{
foreach (UglyToad.PdfPig.Content.Page page in document.GetPages())
{
results = "";
text_region_xh = new List<List<int>>();
foreach (UglyToad.PdfPig.Content.IPdfImage img in page.GetImages())
{
var bytes = img.RawBytes.ToArray();
base64 = Convert.ToBase64String(bytes);
}
}
}
- 双层PDF获取图片
双层PDF直接转图片,网上说有很多种方式,很多都是在特定环境下,依赖环境也不同,系统版本也会有影响,参考很多位博主,综合结果得出以下情况最为适用,我使用的是Freespire.pdf.自行nuget下载哦!
string imgPath = "E:/双层PDF转图片/";
//实例化一个PdfDocument类,并加载文档
Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
doc.LoadFromFile(path);
//遍历PDF每一页
for (int i = 0; i < doc.Pages.Count; i++)
{
Stream imgstream = doc.SaveAsImage(i);
byte[] byData = new Byte[imgstream.Length];
imgstream.Position = 0;
imgstream.Read(byData, 0, byData.Length);
imgstream.Close();(转流操作可忽略)
base64 = Convert.ToBase64String(byData);
System.Drawing.Image bmp = System.Drawing.Image.FromStream(imgstream);
string fileName = string.Format("Page-{0}.png", i + 1);
bmp.Save(imgPath + fileName, System.Drawing.Imaging.ImageFormat.Png);
}
想其他方式处理,可参考以下博主
1.https://blog.csdn.net/WuLex/article/details/107931491?utm_medium=distribute.pc_aggpage_search_result.none-task-blog-2aggregatepagefirst_rank_ecpm_v1~rank_v31_ecpm-9-107931491.pc_agg_new_rank&utm_term=.net+pdf+%E8%BD%AC%E5%9B%BE%E7%89%87%E7%9A%84&spm=1000.2123.3001.4430
2.https://www.jb51.cc/csharp/101115.html
https://ghostscript.com/releases/gsdnld.html
3.还有一种O2S.Components.PDFRender4NE.dll形式的,没测试成功,如果需要可尝试,相关dll,可在本博客内搜索,已上传。
以下为无效代码,走过的弯路,记录一下,一旦以后成功了呢!
/*
Magick.NET-Q16-AnyCPU
*/
//MagickReadSettings settings = new MagickReadSettings();
//settings.Density = new Density(300, 300); //设置质量
//using (MagickImageCollection images = new MagickImageCollection())
//{
// try
// {
// images.Read(path, settings);
// for (int i = 0; i < images.Count; i++)
// {
// MagickImage image = (MagickImage)images[i];
// image.Format = MagickFormat.Jpg;
// image.Write(imgPath + (i + 1) + ".jpg");
// }
// }
// catch (Exception ex)
// {
// Console.WriteLine(ex.Message);
// }
//}
/**/
//PDFFile file = PDFFile.Open(path);
//int pageCount = file.PageCount;
//List imgPathList = new List();
//for (int i = 0; i < pageCount; i++)
//{
// string imgName = DateTime.Now.ToString(“yyyyMMddHHmmssffffff”) + “.jpg”;
// Bitmap img = file.GetBWPageImage(i, 200);
// img.Save(AppDomain.CurrentDomain.BaseDirectory + imgPath + imgName, System.Drawing.Imaging.ImageFormat.Jpeg);
// imgPathList.Add(imgPath + imgName);
//}
// //string outImageName = Path.GetFileNameWithoutExtension(InputPDFFile);
// //outImageName = outImageName + “" + PageNumber.ToString() + ".png”;
// //GhostscriptPngDevice dev = new GhostscriptPngDevice(GhostscriptPngDeviceType.Png256);
// //dev.GraphicsAlphaBits = GhostscriptImageDeviceAlphaBits.V_4;
// //dev.TextAlphaBits = GhostscriptImageDeviceAlphaBits.V_4;
// //dev.ResolutionXY = new GhostscriptImageDeviceResolution(290, 290);
// //dev.InputFiles.Add(path);
// //dev.Pdf.FirstPage = i;
// //dev.Pdf.LastPage = i;
// //dev.CustomSwitches.Add("-dDOINTERPOLATE");
// //dev.OutputPath = "E:/紫光启明/转单层/" + xh + ".png";
// //dev.Process();