截取到网页数据是js加载完以后的
HtmlWeb webClient = new HtmlWeb();
string _url = "http://news.baidu.com/";
//需要解析的url
HtmlAgilityPack.HtmlDocument html1 = webClient.Load(_url);
//获取页面编码格式
var end3 = html1.Encoding.BodyName;
//还是需要设置一次编码格式避免乱码 调用GetHtmlSource方法
string _htmlSource = GetHtmlSource(_url, System.Text.Encoding.GetEncoding(end3));
public static string GetHtmlSource(string url, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
return _html;
}