打开HtmlAgilityPack.1.4.0.Source工程
找到HtmlWeb.cs文件打开修改下面方法中的一小段代码:
private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy,
ICredentials creds)函数中的下方的代码
Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding)
? Encoding.GetEncoding(resp.ContentEncoding)
: null;
/*修改成下面的即可*/
/*王..修改 中文乱码问题*/
//Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding)
// ? Encoding.GetEncoding(resp.ContentEncoding)
// : null;
System.Text.Encoding respenc;
if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length > 0))
{
respenc = System.Text.Encoding.GetEncoding(resp.ContentEncoding);
}
else if ((resp.CharacterSet != null) && (resp.CharacterSet.Length > 0))//根据Content-Type中获取的charset
{
if (string.Compare(resp.CharacterSet, "ISO-8859-1", true, System.Globalization.CultureInfo.InvariantCulture) == 0)
respenc = System.Text.Encoding.GetEncoding("GB2312");
else
respenc = System.Text.Encoding.GetEncoding(resp.CharacterSet);
}
else
{
respenc = System.Text.Encoding.GetEncoding("GB2312");
}
修改后之后,重新编译一下,再使用dll,就不会中文乱码了...
可下载自己修改过的文件: