private void button1_Click(object sender, EventArgs e)
{
//采用开源HtmlAgilityPack解析类,解析Html文档
byte[] buffer;
WebClient webClient = new WebClient();
buffer= webClient.DownloadData(@"可以下载字节也可以是string
MemoryStream ms = new MemoryStream(buffer);
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.Load(ms); //如果是loadHtml参数就是string
//选取的是节点集合,要对集合进行遍历
HtmlNodeCollection collection = htmlDoc.DocumentNode.SelectNodes(".//a[@href]");//采用xpath的方式选取
foreach (var item in collection)
{
HtmlAttribute att = item.Attributes["href"]; //查看这个节点的 href属性
if (att != null) //如果有href属性
{
if (att.Value.StartsWith("http")) //这个属性是否是http开头的
{
listBox1.Items.Add(att.Value);
}
}
}
ms.Close();
ms.Dispose();
webClient.Dispose();
}
html解析
最新推荐文章于 2024-02-20 17:55:32 发布