C#使用Html Agility Pack(HAP)的XPath解析HTML

安装

Html Agility Pack(HAP)是C#的开源项目,支持XPath查询。
官网:https://html-agility-pack.net/
使用NuGet安装,如图:
1

HtmlDocument.Load加载文件

using System;
using HtmlAgilityPack;

public class Program
{
    public static void Main()
    {
        SaveHtmlFile();
        var path = @"test.html";
        var doc = new HtmlDocument();
        doc.Load(path);
        var node = doc.DocumentNode.SelectSingleNode("//body");
        Console.WriteLine(node.OuterHtml);
    }

    private static void SaveHtmlFile()
    {
        var html =
        @"<!DOCTYPE html>
        <html>
        <body>
	        <h1>This is <b>bold</b> heading</h1>
	        <p>This is <u>underlined</u> paragraph</p>
	        <h2>This is <i>italic</i> heading</h2>
        </body>
        </html> ";
        var htmlDoc = new HtmlDocument();
        htmlDoc.LoadHtml(html);
        htmlDoc.Save("test.html");
    }
}

1

HtmlDocument.LoadHtml加载字符串

using System;
using HtmlAgilityPack;

public class Program
{
    public static void Main()
    {
        var html = @"<!DOCTYPE html>
        <html>
        <body>
	        <h1>This is <b>bold</b> heading</h1>
	        <p>This is <u>underlined</u> paragraph</p>
	        <h2>This is <i>italic</i> heading</h2>
        </body>
        </html> ";

        var htmlDoc = new HtmlDocument();
        htmlDoc.LoadHtml(html);
        var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//body");
        Console.WriteLine(htmlBody.OuterHtml);
    }
}

1

HtmlWeb.Load通过URL加载HTML

using HtmlAgilityPack;
using System;

namespace ConsoleApp1
{
    class Program
    {
        static void Main(string[] args)
        {
            var html = @"https://www.baidu.com/";
            var web = new HtmlWeb();
            var doc = web.Load(html);
            var node = doc.DocumentNode.SelectSingleNode("//head/title");
            Console.WriteLine(node.OuterHtml);
        }
    }
}

SelectNodes()选择多个节点

// @nuget: HtmlAgilityPack

using System;
using System.Linq;
using HtmlAgilityPack;

public class Program
{
    public static void Main()
    {
        var html =
        @"<TD class=texte width=""50%"">
			<DIV align=right>Name :<B> </B></DIV>
		</TD>
		<TD width=""50%"">
    		<INPUT class=box value=John maxLength=16 size=16 name=user_name>
    		<INPUT class=box value=Tony maxLength=16 size=16 name=user_name>
    		<INPUT class=box value=Jams maxLength=16 size=16 name=user_name>
		</TD>
		<TR vAlign=center>";

        var htmlDoc = new HtmlDocument();
        htmlDoc.LoadHtml(html);

        string name = htmlDoc.DocumentNode
                        .SelectNodes("//td/input")
                        .First()
                        .Attributes["value"].Value;

        Console.WriteLine(name);
    }
}

SelectSingleNode(String)选择第一个节点

// @nuget: HtmlAgilityPack

using System;

using HtmlAgilityPack;

public class Program
{
	public static void Main()
	{
		var html =
		@"<TD class=texte width=""50%"">
			<DIV align=right>Name :<B> </B></DIV>
		</TD>
		<TD width=""50%"">
    		<INPUT class=box value=第一 maxLength=16 size=16 name=user_name>
    		<INPUT class=box value=第二 maxLength=16 size=16 name=user_name>
    		<INPUT class=box value=第三 maxLength=16 size=16 name=user_name>
		</TD>
		<TR vAlign=center>";

		var htmlDoc = new HtmlDocument();
		htmlDoc.LoadHtml(html);

		string name = htmlDoc.DocumentNode
			.SelectSingleNode("//td/input")
			.Attributes["value"].Value;

		Console.WriteLine(name);
	}
}

获取属性

// @nuget: HtmlAgilityPack

using System;
using System.Xml;
using HtmlAgilityPack;

public class Program
{
    public static void Main()
    {
        var html =
        @"<body>
            <h1>This is <b>bold</b> heading</h1>
            <p>This is <u>underlined</u> paragraph</p>
			
			<h1>This is <i>italic</i> heading</h1>
			<p>This is <u>underlined</u> paragraph</p>
        </body>";

        var htmlDoc = new HtmlDocument();
        htmlDoc.LoadHtml(html);

        var htmlNodes = htmlDoc.DocumentNode.SelectNodes("//body/h1");

        foreach (var node in htmlNodes)
        {
            Console.WriteLine("InnerHtml:" + node.InnerHtml);
            Console.WriteLine("OuterHtml:" + node.OuterHtml);
            Console.WriteLine("InnerText:"+node.InnerText);
            Console.WriteLine("ParentNode" + node.ParentNode.Name);
            Console.WriteLine("===========");
        }
    }
}

1

参考

https://html-agility-pack.net/parser

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小龙在山东

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值