HTML源码解析帮助类

using System; using System.Collections; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; namespace Zhuyi.Utility { /// <summary> /// HTML源码解析帮助类 /// </summary> public class HtmlUtil { /// <summary> /// 获取指定ID的标签内容 /// </summary> /// <param name="html">HTML源码</param> /// <param name="id">标签ID</param> /// <returns></returns> public static string GetElementById(string html, string id) { string pattern = @"<([a-z]+)(?:(?!id)[^<>])*id=([""']?){0}/2[^>]*>(?>(?<o></1[^>]*>)|(?<-o><//1>)|(?:(?!</?/1).))*(?(o)(?!))<//1>"; pattern = string.Format(pattern, Regex.Escape(id)); Match match = Regex.Match(html, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase); return match.Success ? match.Value : ""; } /// <summary> /// 通过class属性获取对应标签集合 /// </summary> /// <param name="html">HTML源码</param> /// <param name="className">class值</param> /// <returns></returns> public static string[] GetElementsByClass(string html, string className) { return GetElements(html, "", className); } /// <summary> /// 通过标签名获取标签集合 /// </summary> /// <param name="html">HTML源码</param> /// <param name="tagName">标签名(如div)</param> /// <returns></returns> public static string[] GetElementsByTagName(string html, string tagName) { return GetElements(html, tagName, ""); } /// <summary> /// 通过同时指定标签名+class值获取标签集合 /// </summary> /// <param name="html">HTML源码</param> /// <param name="tagName">标签名</param> /// <param name="className">class值</param> /// <returns></returns> public static string[] GetElementsByTagAndClass(string html, string tagName, string className) { return GetElements(html, tagName, className); } private static string[] GetElements(string html, string tagName, string className) { string pattern = ""; if (tagName != "" && className != "") { pattern = @"<({0})(?:(?!class)[^<>])*class=([""']?){1}/2[^>]*>(?>(?<o></1[^>]*>)|(?<-o><//1>)|(?:(?!</?/1).))*(?(o)(?!))<//1>"; pattern = string.Format(pattern, Regex.Escape(tagName), Regex.Escape(className)); } else if (tagName != "") { pattern = @"<({0})(?:[^<>])*>(?>(?<o></1[^>]*>)|(?<-o><//1>)|(?:(?!</?/1).))*(?(o)(?!))<//1>"; pattern = string.Format(pattern, Regex.Escape(tagName)); } else if (className != "") { pattern = @"<([a-z]+)(?:(?!class)[^<>])*class=([""']?){0}/2[^>]*>(?>(?<o></1[^>]*>)|(?<-o><//1>)|(?:(?!</?/1).))*(?(o)(?!))<//1>"; pattern = string.Format(pattern, Regex.Escape(className)); } if (pattern == "") { return new string[] { }; } List<string> list = new List<string>(); Regex reg = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase); Match match = reg.Match(html); while (match.Success) { list.Add(match.Value); match = reg.Match(html, match.Index + match.Length); } return list.ToArray(); } } }

Zhuyi.Utility:

包括缓存操作、配置文件操作、cookie操作、日期时间操作、文本文件操作、HTML源码解析、图片裁剪/缩放、常用正则验证、加密/解密、http请求、xml读写、对数组的排序。

下载地址:http://download.csdn.net/source/2277988


作者:朱会震


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值