c#中将HTML文件转换成PDF文件

一、Pechkin:html->pdf

1.WinForm中转换为PDF

  a.在项目添加引用,引用 -> 管理NuGet程序包

 

b.在导出PDF按钮中添加方法

 

 1 SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
 2                 .SetMargins(new Margins() { Left = 10, Right = 10, Top = 0, Bottom = 0 }) //设置边距
 3                 .SetPaperOrientation(false) //设置纸张方向为横向
 4                 .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张为A4纸大小
 5 
 6             byte[] buf = sc.Convert(new ObjectConfig(), getWebContent());
 7 
 8             if (buf == null)
 9             {
10                 MessageBox.Show("Error converting!");
11                 return;
12             }
13 
14             File.WriteAllBytes(@"d:\google-news123.pdf", buf);
15 
16             try
17             {
18                 string fn = Path.GetTempFileName() + ".pdf";
19                 FileStream fs = new FileStream(fn, FileMode.Create);
20                 fs.Write(buf, 0, buf.Length);
21                 fs.Close();
22 
23                 //MessageBox.Show("操作成功,文件已保存至F盘下", "提示");
24 
25                 Process myProcess = new Process();
26                 myProcess.StartInfo.FileName = fn;
27                 myProcess.Start();
28 
29                 //SaveFileDialog();
30             }
31             catch { }

View Code

 相关方法

 

 1 private int ConvertToHundredthsInch(int millimeter)
 2         {
 3             return (int)((millimeter * 10.0) / 2.54);
 4         }
 5 
 6         /// <summary>
 7         /// 获取网站内容,包含了 HTML+CSS+JS
 8         /// </summary>
 9         /// <returns>String返回网页信息</returns>
10         public string getWebContent()
11         {
12             try
13             {
14                 WebClient MyWebClient = new WebClient();
15                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
16                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
17                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan_new.asp?nstr=jwmlYCBYPDcHJlX2VudHJ5X2lkPTIyMjkyMDE1MDc5MTk1MjcyOSZ0b2lwPTExNA==");
18                 //从指定网站下载数据
19                 string pageHtml = Encoding.UTF8.GetString(pageData);
20                 //如果获取网站页面采用的是GB2312,则使用这句       
21                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
22                 if (!isBool)
23                 {
24                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
25                     pageHtml = pageHtml1;
26                 }
27                 else
28                 {
29                     string pageHtml2 = Encoding.Default.GetString(pageData);
30                     pageHtml = pageHtml2;
31                 }
32                 return pageHtml;
33             }
34 
35             catch (WebException webEx)
36             {
37                 Console.WriteLine(webEx.Message.ToString());
38                 return webEx.Message;
39             }
40         }
41 
42         /// <summary>
43         /// 判断是否有乱码
44         /// </summary>
45         /// <param name="txt"></param>
46         /// <returns></returns>
47         public bool isMessyCode(string txt)
48         {
49             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
50             for (var i = 0; i < bytes.Length; i++)
51             {
52                 if (i < bytes.Length - 3)
53                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
54                     {
55                         return true;
56                     }
57             }
58             return false;
59         }

相关方法

优缺点

  1.只能保存到制定的目录中,并且直接打开文件

  2.网页中的图片导不出来

  3.可能会出现乱码

  4.生成项目的时候需要把相应的DLL拷贝进去,不然不能生成

 

这是另外一种方法:http://www.cnblogs.com/lsgsanxiao/p/4878077.html

 

 

 

2.WEB网站中转换为PDF

  项目Demo http://pan.baidu.com/s/1gfhRR8n

  a.项目相关引用与上面相同

  b.网站中采用JS调用一般处理程序的方式

 

1 function createPdf() {
2             window.open("CreatePdf.ashx?html=222222222222233324243");
3         }

View Code

 

  1 using System;
  2 using System.Drawing.Printing;
  3 using System.IO;
  4 using System.Net;
  5 using System.Text;
  6 using System.Web;
  7 using Pechkin;
  8 using Pechkin.Synchronized;
  9 
 10 namespace WebApplication3
 11 {
 12     /// <summary>
 13     /// CreatePdf 的摘要说明
 14     /// </summary>
 15     public class CreatePdf : IHttpHandler
 16     {
 17 
 18         public void ProcessRequest(HttpContext context)
 19         {
 20             string htmlFile = context.Request["html"];
 21 
 22             string html = getWebContent();
 23             SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
 24                                     .SetMargins(new Margins() { Left = 0, Right = 0, Top = 0, Bottom = 0 }) //设置边距
 25                                     .SetPaperOrientation(false) //设置纸张方向为横向
 26                                     .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张大小50mm * 100mm
 27 
 28             byte[] buf = sc.Convert(new ObjectConfig(), html);
 29 
 30             if (buf == null)
 31             {
 32                 context.Response.ContentType = "text/plain";
 33                 context.Response.Write("Error converting!");
 34             }
 35 
 36             try
 37             {
 38                 context.Response.Clear();
 39 
 40 
 41                 //方式1:提示浏览器下载pdf   
 42                 context.Response.AddHeader("content-disposition", "attachment;filename=" + htmlFile + ".pdf");
 43                 context.Response.ContentType = "application/octet-stream";
 44                 context.Response.BinaryWrite(buf);
 45 
 46                 //方式2:直接在浏览器打开pdf
 47                 //context.Response.ContentType = "application/pdf";
 48                 //context.Response.OutputStream.Write(buf, 0, buf.Length);
 49 
 50                 context.Response.End();
 51 
 52             }
 53             catch (Exception e)
 54             {
 55                 context.Response.ContentType = "text/plain";
 56                 context.Response.Write(e.Message);
 57             }
 58         }
 59 
 60         public bool IsReusable
 61         {
 62             get
 63             {
 64                 return false;
 65             }
 66         }
 67 
 68         private int ConvertToHundredthsInch(int millimeter)
 69         {
 70             return (int)((millimeter * 10.0) / 2.54);
 71         }
 72 
 73         /// <summary>
 74         /// 获取网站内容,包含了 HTML+CSS+JS
 75         /// </summary>
 76         /// <returns>String返回网页信息</returns>
 77         public string getWebContent()
 78         {
 79             try
 80             {
 81                 WebClient MyWebClient = new WebClient();
 82                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
 83                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
 84                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan.asp?nstr=AAfFJb_SVvcHJlX2VudHJ5X2lkPTIyMzEyMDE1MDgxMTY0NDUzOSZ0b2lwPTExNA==");
 85                 //从指定网站下载数据
 86                 string pageHtml = Encoding.UTF8.GetString(pageData);
 87                 //如果获取网站页面采用的是GB2312,则使用这句       
 88                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
 89                 if (!isBool)
 90                 {
 91                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
 92                     pageHtml = pageHtml1;
 93                 }
 94                 else
 95                 {
 96                     string pageHtml2 = Encoding.Default.GetString(pageData);
 97                     pageHtml = pageHtml2;
 98                 }
 99                 return pageHtml;
100             }
101 
102             catch (WebException webEx)
103             {
104                 Console.WriteLine(webEx.Message.ToString());
105                 return webEx.Message;
106             }
107         }
108 
109         /// <summary>
110         /// 判断是否有乱码
111         /// </summary>
112         /// <param name="txt"></param>
113         /// <returns></returns>
114         public bool isMessyCode(string txt)
115         {
116             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
117             for (var i = 0; i < bytes.Length; i++)
118             {
119                 if (i < bytes.Length - 3)
120                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
121                     {
122                         return true;
123                     }
124             }
125             return false;
126         }
127     }
128 }

//***************************

Pechkin开源组件使用wkhtmlbox,可以解析CSS样式,将网页转换为PDF文件,支持URL,或者HTML字符串。


在 Nuget 管理器中搜索“Pechkin”,请选择 CPechkin For .Net20+,这个组件是作者在 Pechkin 基础上修改的,剔除了 Common.Loging 依赖,描述中说项目必须是x86编译,但我发现,设置成 Any CPU也可以,但是如果是Web项目,部署到IIS后,应用程序池高级设置中必须选择启用32位应用程序,否则运行会报错。

包加载完毕后,会在项目根目录下放置几个DLL,这些是 Pechkin 依赖的,如果想删除,请先将这几个DLL拷贝到bin下

编译后,BIN下面有7个相关的DLL,这就是 Pechkin 的全部

调用代码很简单

SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
    .SetMargins(new Margins() { Left = 50, Right = 50, Top = 50, Bottom = 50 }) //设置边距
    .SetPaperOrientation(false) //设置纸张方向为横向
    .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297)) //设置纸张大小210mm * 297mm
);
 
byte[] buf = sc.Convert(new ObjectConfig(), html);
 
if (buf == null)
{
    MessageBox.Show("Error converting!");
}
 
try
{
    string fn = "D:\XXX.pdf";
    FileStream fs = new FileStream(fn, FileMode.Create);
    fs.Write(buf, 0, buf.Length);
    fs.Close();
 
    Process myProcess = new Process();
    myProcess.StartInfo.FileName = fn;
    myProcess.Start();
}
catch (Exception ex)
{
    MessageBox.Show($"PDF {ex.Message}");
}
生成PDF

 

你可以使用第三方库iTextSharp来将OFD文件转换成PDF文件。首先需要将OFD文件解压缩,然后将解压缩后的XML文件转换成PDF文件。 以下是一个示例代码: ```csharp using iTextSharp.text; using iTextSharp.text.pdf; using System.IO; using System.Xml; public static void ConvertOFDToPDF(string ofdFilePath, string pdfFilePath) { // 解压缩OFD文件 string tempFolderPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); Directory.CreateDirectory(tempFolderPath); ZipFile.ExtractToDirectory(ofdFilePath, tempFolderPath); // 获取OFD文档的页面尺寸 string documentXmlPath = Path.Combine(tempFolderPath, "Documents", "Document.xml"); XmlDocument documentXml = new XmlDocument(); documentXml.Load(documentXmlPath); XmlNamespaceManager nsManager = new XmlNamespaceManager(documentXml.NameTable); nsManager.AddNamespace("ofd", "http://www.ofdspec.org"); XmlNode pageAreaNode = documentXml.SelectSingleNode("/ofd:Document/ofd:Pages/ofd:PageArea", nsManager); float pageWidth = float.Parse(pageAreaNode.Attributes["PageWidth"].Value); float pageHeight = float.Parse(pageAreaNode.Attributes["PageHeight"].Value); // 创建PDF文档 using (FileStream pdfFileStream = new FileStream(pdfFilePath, FileMode.Create)) { using (Document pdfDocument = new Document(new Rectangle(pageWidth, pageHeight))) { using (PdfWriter pdfWriter = PdfWriter.GetInstance(pdfDocument, pdfFileStream)) { pdfDocument.Open(); // 遍历OFD文档的页面,将每个页面转换成PDF页面 XmlNodeList pageNodes = documentXml.SelectNodes("/ofd:Document/ofd:Pages/ofd:Page", nsManager); foreach (XmlNode pageNode in pageNodes) { string pageFileNodeValue = pageNode.Attributes["BaseLoc"].Value; string pageFilePath = Path.Combine(tempFolderPath, pageFileNodeValue); using (FileStream pageFileStream = new FileStream(pageFilePath, FileMode.Open)) { using (MemoryStream pageMemoryStream = new MemoryStream()) { pageFileStream.CopyTo(pageMemoryStream); byte[] pageBytes = pageMemoryStream.ToArray(); // 将OFD页面转换成PDF页面 using (MemoryStream pdfMemoryStream = new MemoryStream()) { using (Document pageDocument = new Document()) { using (PdfWriter pdfPageWriter = PdfWriter.GetInstance(pageDocument, pdfMemoryStream)) { pageDocument.Open(); using (MemoryStream pageXmlStream = new MemoryStream(pageBytes)) { XmlDocument pageXml = new XmlDocument(); pageXml.Load(pageXmlStream); XmlNode pageContentNode = pageXml.SelectSingleNode("/ofd:Page/ofd:Content", nsManager); string pageContent = pageContentNode.InnerXml; using (MemoryStream pageContentStream = new MemoryStream()) { using (StreamWriter pageContentWriter = new StreamWriter(pageContentStream)) { pageContentWriter.Write(pageContent); pageContentWriter.Flush(); pageContentStream.Position = 0; XmlParserContext xmlParserContext = new XmlParserContext(null, nsManager, null, XmlSpace.None); XmlReaderSettings xmlReaderSettings = new XmlReaderSettings(); xmlReaderSettings.ConformanceLevel = ConformanceLevel.Fragment; using (XmlReader xmlReader = XmlReader.Create(pageContentStream, xmlReaderSettings, xmlParserContext)) { while (xmlReader.Read()) { if (xmlReader.NodeType == XmlNodeType.Element) { string elementName = xmlReader.Name; if (elementName == "ofd:TextObject") { string fontId = xmlReader.GetAttribute("Font"); string fontSizeString = xmlReader.GetAttribute("Size"); float fontSize = float.Parse(fontSizeString); string fontFamily = null; XmlNode fontNode = documentXml.SelectSingleNode($"/ofd:Document/ofd:Fonts/ofd:Font[@ID='{fontId}']", nsManager); if (fontNode != null) { fontFamily = fontNode.Attributes["FontName"].Value; } if (string.IsNullOrEmpty(fontFamily)) { fontFamily = "宋体"; } BaseFont baseFont = BaseFont.CreateFont($"{fontFamily},Bold", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); Font font = new Font(baseFont, fontSize); string text = xmlReader.ReadElementContentAsString(); pdfDocument.Add(new Paragraph(text, font)); } else if (elementName == "ofd:PathObject") { // 处理OFD路径对象 } else if (elementName == "ofd:ImageObject") { // 处理OFD图像对象 } } } } } } } pageDocument.Close(); } } byte[] pdfBytes = pdfMemoryStream.ToArray(); PdfReader pdfReader = new PdfReader(pdfBytes); pdfWriter.DirectContent.AddTemplate(pdfWriter.GetImportedPage(pdfReader, 1), 0, 0); } } } } pdfDocument.Close(); } } } Directory.Delete(tempFolderPath, true); } ``` 请注意,这段代码只是一个示例,可能需要根据你的具体情况进行修改和调整。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值