c#中将HTML文件转换成PDF文件

最新推荐文章于 2024-08-15 11:28:31 发布

luckyone906

最新推荐文章于 2024-08-15 11:28:31 发布

阅读量5.2k

点赞数

分类专栏： WinForm技术文件IO操作笔记

本文链接：https://blog.csdn.net/u011555996/article/details/115469179

版权

WinForm技术同时被 3 个专栏收录

79 篇文章 22 订阅

订阅专栏

文件IO操作

20 篇文章 1 订阅

订阅专栏

笔记

5 篇文章 0 订阅

订阅专栏

一、Pechkin:html->pdf

1.WinForm中转换为PDF

　　a.在项目添加引用，引用 -> 管理NuGet程序包

b.在导出PDF按钮中添加方法

 1 SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
 2                 .SetMargins(new Margins() { Left = 10, Right = 10, Top = 0, Bottom = 0 }) //设置边距
 3                 .SetPaperOrientation(false) //设置纸张方向为横向
 4                 .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张为A4纸大小
 5 
 6             byte[] buf = sc.Convert(new ObjectConfig(), getWebContent());
 7 
 8             if (buf == null)
 9             {
10                 MessageBox.Show("Error converting!");
11                 return;
12             }
13 
14             File.WriteAllBytes(@"d:\google-news123.pdf", buf);
15 
16             try
17             {
18                 string fn = Path.GetTempFileName() + ".pdf";
19                 FileStream fs = new FileStream(fn, FileMode.Create);
20                 fs.Write(buf, 0, buf.Length);
21                 fs.Close();
22 
23                 //MessageBox.Show("操作成功，文件已保存至F盘下", "提示");
24 
25                 Process myProcess = new Process();
26                 myProcess.StartInfo.FileName = fn;
27                 myProcess.Start();
28 
29                 //SaveFileDialog();
30             }
31             catch { }

View Code

相关方法

 1 private int ConvertToHundredthsInch(int millimeter)
 2         {
 3             return (int)((millimeter * 10.0) / 2.54);
 4         }
 5 
 6         /// <summary>
 7         /// 获取网站内容，包含了 HTML+CSS+JS
 8         /// </summary>
 9         /// <returns>String返回网页信息</returns>
10         public string getWebContent()
11         {
12             try
13             {
14                 WebClient MyWebClient = new WebClient();
15                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
16                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
17                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan_new.asp?nstr=jwmlYCBYPDcHJlX2VudHJ5X2lkPTIyMjkyMDE1MDc5MTk1MjcyOSZ0b2lwPTExNA==");
18                 //从指定网站下载数据
19                 string pageHtml = Encoding.UTF8.GetString(pageData);
20                 //如果获取网站页面采用的是GB2312，则使用这句       
21                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
22                 if (!isBool)
23                 {
24                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
25                     pageHtml = pageHtml1;
26                 }
27                 else
28                 {
29                     string pageHtml2 = Encoding.Default.GetString(pageData);
30                     pageHtml = pageHtml2;
31                 }
32                 return pageHtml;
33             }
34 
35             catch (WebException webEx)
36             {
37                 Console.WriteLine(webEx.Message.ToString());
38                 return webEx.Message;
39             }
40         }
41 
42         /// <summary>
43         /// 判断是否有乱码
44         /// </summary>
45         /// <param name="txt"></param>
46         /// <returns></returns>
47         public bool isMessyCode(string txt)
48         {
49             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
50             for (var i = 0; i < bytes.Length; i++)
51             {
52                 if (i < bytes.Length - 3)
53                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
54                     {
55                         return true;
56                     }
57             }
58             return false;
59         }

相关方法

优缺点

　　1.只能保存到制定的目录中，并且直接打开文件

　　2.网页中的图片导不出来

　　3.可能会出现乱码

　　4.生成项目的时候需要把相应的DLL拷贝进去，不然不能生成

这是另外一种方法：http://www.cnblogs.com/lsgsanxiao/p/4878077.html

2.WEB网站中转换为PDF

　　项目Demo http://pan.baidu.com/s/1gfhRR8n

　　a.项目相关引用与上面相同

　　b.网站中采用JS调用一般处理程序的方式

1 function createPdf() {
2             window.open("CreatePdf.ashx?html=222222222222233324243");
3         }

View Code

  1 using System;
  2 using System.Drawing.Printing;
  3 using System.IO;
  4 using System.Net;
  5 using System.Text;
  6 using System.Web;
  7 using Pechkin;
  8 using Pechkin.Synchronized;
  9 
 10 namespace WebApplication3
 11 {
 12     /// <summary>
 13     /// CreatePdf 的摘要说明
 14     /// </summary>
 15     public class CreatePdf : IHttpHandler
 16     {
 17 
 18         public void ProcessRequest(HttpContext context)
 19         {
 20             string htmlFile = context.Request["html"];
 21 
 22             string html = getWebContent();
 23             SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
 24                                     .SetMargins(new Margins() { Left = 0, Right = 0, Top = 0, Bottom = 0 }) //设置边距
 25                                     .SetPaperOrientation(false) //设置纸张方向为横向
 26                                     .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张大小50mm * 100mm
 27 
 28             byte[] buf = sc.Convert(new ObjectConfig(), html);
 29 
 30             if (buf == null)
 31             {
 32                 context.Response.ContentType = "text/plain";
 33                 context.Response.Write("Error converting!");
 34             }
 35 
 36             try
 37             {
 38                 context.Response.Clear();
 39 
 40 
 41                 //方式1：提示浏览器下载pdf   
 42                 context.Response.AddHeader("content-disposition", "attachment;filename=" + htmlFile + ".pdf");
 43                 context.Response.ContentType = "application/octet-stream";
 44                 context.Response.BinaryWrite(buf);
 45 
 46                 //方式2：直接在浏览器打开pdf
 47                 //context.Response.ContentType = "application/pdf";
 48                 //context.Response.OutputStream.Write(buf, 0, buf.Length);
 49 
 50                 context.Response.End();
 51 
 52             }
 53             catch (Exception e)
 54             {
 55                 context.Response.ContentType = "text/plain";
 56                 context.Response.Write(e.Message);
 57             }
 58         }
 59 
 60         public bool IsReusable
 61         {
 62             get
 63             {
 64                 return false;
 65             }
 66         }
 67 
 68         private int ConvertToHundredthsInch(int millimeter)
 69         {
 70             return (int)((millimeter * 10.0) / 2.54);
 71         }
 72 
 73         /// <summary>
 74         /// 获取网站内容，包含了 HTML+CSS+JS
 75         /// </summary>
 76         /// <returns>String返回网页信息</returns>
 77         public string getWebContent()
 78         {
 79             try
 80             {
 81                 WebClient MyWebClient = new WebClient();
 82                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
 83                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
 84                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan.asp?nstr=AAfFJb_SVvcHJlX2VudHJ5X2lkPTIyMzEyMDE1MDgxMTY0NDUzOSZ0b2lwPTExNA==");
 85                 //从指定网站下载数据
 86                 string pageHtml = Encoding.UTF8.GetString(pageData);
 87                 //如果获取网站页面采用的是GB2312，则使用这句       
 88                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
 89                 if (!isBool)
 90                 {
 91                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
 92                     pageHtml = pageHtml1;
 93                 }
 94                 else
 95                 {
 96                     string pageHtml2 = Encoding.Default.GetString(pageData);
 97                     pageHtml = pageHtml2;
 98                 }
 99                 return pageHtml;
100             }
101 
102             catch (WebException webEx)
103             {
104                 Console.WriteLine(webEx.Message.ToString());
105                 return webEx.Message;
106             }
107         }
108 
109         /// <summary>
110         /// 判断是否有乱码
111         /// </summary>
112         /// <param name="txt"></param>
113         /// <returns></returns>
114         public bool isMessyCode(string txt)
115         {
116             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
117             for (var i = 0; i < bytes.Length; i++)
118             {
119                 if (i < bytes.Length - 3)
120                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
121                     {
122                         return true;
123                     }
124             }
125             return false;
126         }
127     }
128 }

//***************************

Pechkin开源组件使用wkhtmlbox，可以解析CSS样式，将网页转换为PDF文件，支持URL，或者HTML字符串。

在 Nuget 管理器中搜索“Pechkin”，请选择 CPechkin For .Net20+，这个组件是作者在 Pechkin 基础上修改的，剔除了 Common.Loging 依赖，描述中说项目必须是x86编译，但我发现，设置成 Any CPU也可以，但是如果是Web项目，部署到IIS后，应用程序池高级设置中必须选择启用32位应用程序，否则运行会报错。

包加载完毕后，会在项目根目录下放置几个DLL，这些是 Pechkin 依赖的，如果想删除，请先将这几个DLL拷贝到bin下

编译后，BIN下面有7个相关的DLL，这就是 Pechkin 的全部

调用代码很简单

SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
.SetMargins(new Margins() { Left = 50, Right = 50, Top = 50, Bottom = 50 }) //设置边距
.SetPaperOrientation(false) //设置纸张方向为横向
.SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297)) //设置纸张大小210mm * 297mm
);

byte[] buf = sc.Convert(new ObjectConfig(), html);

if (buf == null)
{
MessageBox.Show("Error converting!");
}

try
{
string fn = "D:\XXX.pdf";
FileStream fs = new FileStream(fn, FileMode.Create);
fs.Write(buf, 0, buf.Length);
fs.Close();

Process myProcess = new Process();
myProcess.StartInfo.FileName = fn;
myProcess.Start();
}
catch (Exception ex)
{
MessageBox.Show($"PDF {ex.Message}");
}
生成PDF