var document = new HtmlDocument();
document.Load(@"C:\Documents and Settings\Kenny\My Documents\project\document.html");
var table = document.GetElementbyId("table5");
var tableRows = table.ChildNodes
.Where(cn => cn.NodeType == HtmlNodeType.Element)
.Skip(2);
这个组件侧重点在网页内容的分析,如果只是获取网页内容,还有更简单的方法:
WebClient client = new WebClient();
string downloadString = client.DownloadString("http://www.gooogle.com");
或
WebRequest request = WebRequest.Create("http://www.google.com");
WebResponse response = request.GetResponse();
Stream data = response.GetResponseStream();
string html = String.Empty;
using (StreamReader sr = new StreamReader(data))
{
html = sr.ReadToEnd();
}
或
string ReadTextFromUrl(string url) {
// WebClient is still convenient
// Assume UTF8, but detect BOM - could also honor response charset I suppose
using (var client = new WebClient())
using (var stream = client.OpenRead(url))
using (var textReader = new StreamReader(stream, Encoding.UTF8, true)) {
return textReader.ReadToEnd();
}
}