正则辅助类

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace Public
{
   public  class HtmlHelper
    {
        /// <summary>
        /// 返回匹配的字符串集合
        /// </summary>
        /// <param name="strHTML">源代码</param>
        /// <param name="rule">规则</param>
        /// <returns></returns>
        public static List<string> GetMatchString(string strHTML, string rule)
        {
           // rule = "<dd>((?:(?!id=1766690474)[\\d\\D])+?)dd>";
            if (strHTML == null || strHTML == "" || rule == null || rule == "")
            {
                return null;
            }
            try
            {
                Regex r = new Regex(CreatePattern(rule), RegexOptions.IgnoreCase);
                MatchCollection mc = r.Matches(strHTML);
                if (mc != null && mc.Count > 0)
                {
                    List<string> list = new List<string>();
                    string value;
                    foreach (Match m in mc)
                    {
                        GroupCollection gc = m.Groups;
                        if (gc != null && gc.Count > 1)
                        {
                            for (int i = 1; i < gc.Count; i++)
                            {
                                value = gc[i].Value.Trim();
                                if (value.Length > 0)
                                {
                                    list.Add(value);
                                }
                            }
                        }
                    }
                    return list;
                }
            }
            catch (Exception ex){ }
           
            return null;
        }
       /// <summary>
       /// 判断字符串是否匹配,并返回匹配组的值
       /// </summary>
       /// <param name="strHTML"></param>
       /// <param name="rule"></param>
       /// <param name="matchValue"></param>
       /// <returns></returns>
       public static bool Match(string strHTML, string rule, out string groupValue)
       {
           groupValue = "";
           if (strHTML == null || strHTML == "" || rule == null || rule == "")
           {
               return false;
           }
           Regex r = new Regex(CreatePattern(rule), RegexOptions.IgnoreCase);
           Match m = r.Match(strHTML);
           if (m.Success)
           {
               string value;
               GroupCollection gc = m.Groups;
               if (gc != null && gc.Count > 1)
               {
                   for (int i = 1; i < gc.Count; i++)
                   {
                       value = gc[i].Value.Trim();
                       if (value.Length > 0)
                       {
                           groupValue =value ;
                           break;
                       }
                   }
               }
               return true;
           }
           return false;
       }
        /// <summary>
        /// 返回匹配的键值对
        /// </summary>
        /// <param name="strHTML">源代码</param>
        /// <param name="rule">规则</param>
        /// <returns></returns>
        public static string GetKeyValue(string strHTML, string rule,out string value)
        {
            value = "";
            if (strHTML == null || strHTML == "" || rule == null || rule == "")
            {
                return "";
            }
            Regex r = new Regex(CreatePattern(rule), RegexOptions.IgnoreCase);
            Match m = r.Match(strHTML);
            if (m!=null && m.Success)
            {
                GroupCollection gc = m.Groups;
                if (gc != null && gc.Count > 1)
                {
                    try
                    {
                        value = gc["value"].Value.Trim();
                        return gc["key"].Value.Trim();
                    }
                    catch (Exception)
                    {
                           return "";
                    }
                }
            }
            return "";
        }
       /// <summary>
       /// 分割字符串
       /// </summary>
       /// <param name="str"></param>
       /// <param name="split"></param>
       /// <returns></returns>
       public static  string[] SplitString(string str, string split)
       {
           if (str != null && split != null)
           {
               return Regex.Split(str, split);
           }
           return null;
       }

       /// <summary>
       /// 是否匹配
       /// </summary>
       /// <param name="strHTML"></param>
       /// <param name="rule"></param>
       /// <returns></returns>
       public static bool IsMatch(string strHTML, string rule)
       {
           if (strHTML == null || strHTML == "")
           {
               return false;
           }
           Regex r = new Regex(CreatePattern(rule), RegexOptions.IgnoreCase);
           return r.IsMatch(strHTML);
       }

       /// <summary>
       /// 是否匹配
       /// </summary>
       /// <param name="strHTML"></param>
       /// <param name="rule"></param>
       /// <returns></returns>
       public static string  Replace(string orgStr, string rule,string str)
       {
           if (orgStr == null || orgStr == "")
           {
               return "";
           }
           Regex r = new Regex(rule, RegexOptions.IgnoreCase);
           return r.Replace(orgStr,str);
       }
        /// <summary>
        /// 创建正则模型
        /// </summary>
        /// <param name="rule"></param>
        /// <returns></returns>
        private static string CreatePattern(string rule)
        {
          //  return rule;
            rule = rule.Trim();

            if (rule.IndexOf("#content#") >= 0)
            {
                //rule = rule.Replace("#content#", "((?:(?!" + Regex.Split(rule, "#content#")[1] + ").)+)");        // 不包含结束标签的html
                rule = rule.Replace("#content#", "(.+?)");
            }

            if (rule.IndexOf("#url#") >= 0)
            {
                rule = rule.Replace("#url#", "([^<>\"\']+)");
            }
            if (rule.IndexOf("#title#") >= 0)
            {
                rule = rule.Replace("#title#", "((?:(?!" + Regex.Split(rule, "#title#")[1] + ").)+)");
            }
            if (rule.IndexOf("#keyword#") >= 0)
            {
                rule = rule.Replace("#keyword#", "((?:(?!" + Regex.Split(rule, "#keyword#")[1] + ").)+)");
            }
            if (rule.IndexOf("#parameter#") >= 0)
            {
                rule = rule.Replace("#parameter#", "([^\\}]+)");
            }
            return rule;
        }
        /// <summary>
        /// 获取指定编码后的字符串
        /// </summary>
        /// <param name="str"></param>
        /// <param name="encoding"></param>
        /// <returns></returns>
        public static string GetEncoding(string str, string encoding)
        {
            if (encoding.ToLower() == "utf-8")
            {
                return UTF8(str);
            }
            return GB2312(str);
        }
        public static string GB2312(string str)
        {
            str = ReplaceSpecialChar(str);
            StringBuilder sb = new StringBuilder();
            Encoding en = Encoding.GetEncoding("GB2312");
            for (int i = 0; i < str.Length; i++)
            {
                byte[] byteCode = en.GetBytes(str[i].ToString());
                if (byteCode.Length == 2)
                {
                    sb.Append("%" + Convert.ToString(byteCode[0], 16) + "%" + Convert.ToString(byteCode[1], 16));
                }
                else
                {
                    sb.Append(str[i]);
                }
            }

            return sb.ToString();
        }


        /// <summary>
        /// 把汉字按utf-8 编码
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static string UTF8(string str)
        {
            str = ReplaceSpecialChar(str) ;
            StringBuilder sb = new StringBuilder();
            Encoding en = Encoding.GetEncoding("UTF-8");
            for (int i = 0; i < str.Length; i++)
            {
                byte[] byteCode = en.GetBytes(str[i].ToString());
                if (byteCode.Length == 3)
                {
                    sb.Append("%" + Convert.ToString(byteCode[0], 16) + "%" + Convert.ToString(byteCode[1], 16) + "%" + Convert.ToString(byteCode[2], 16));
                }
                else
                {
                    sb.Append(str[i]);
                }
            }
            return sb.ToString();
        }
        /// <summary>
        /// 替换特殊字符
        /// </summary>
        /// <returns></returns>
        public static string ReplaceChar(string str)
        {
            return str.Replace("%", "%25").Replace(" ", "%20").Replace("&", "%26").Replace("?", "%3F").Replace("=", "%3D");
        }
       /// <summary>
       /// 替换特殊字符
       /// </summary>
       /// <returns></returns>
       public static string ReplaceSpecialChar(string str)
       {
           return str.Replace("%", "%25").Replace(" ", "%20").Replace("&", "%26").Replace("?", "%3F").Replace(":", "%3A").Replace("=", "%3D").Replace("/", "%2F").Replace("+", "%2B").Replace("@", "%40");
       }
        /// <summary>
        /// 处理问答列表区域、问答标题、最佳答案
        /// 返回指定规则的HTML内容
        /// </summary>
        /// <param name="strHTML">源代码</param>
        /// <param name="rule">规则</param>
        /// <returns></returns>
        public static string GetHtmlContent(string strHTML, string rule)
        {
            if (strHTML == null || strHTML == "" || rule == null || rule == "")
            {
                return "";
            }
           
            Regex r = new Regex(CreatePattern(rule), RegexOptions.IgnoreCase);
            Match m = r.Match(strHTML);
            if (m.Success)
            {
                GroupCollection gc = m.Groups;
                if (gc != null && gc.Count > 1)
                {
                    for (int i = 1; i < gc.Count; i++)
                    {
                        if (gc[i].Value.Length > 0)
                        {
                            return  gc[i].Value.Trim();
                        }
                    }
                }
            }
            return "";
        }

        /// <summary>
        /// 过滤html 标签
        /// </summary>
        /// <param name="strHTML"></param>
        /// <returns></returns>
        public static string FilerHTML(string strHTML)
        {
            if (strHTML == null || strHTML.Length ==0)
            {
                return "";
            }
            Regex r = new Regex("<(a|(?:script)|(?:style))[^>]*?>[\\d\\D]*?</\\1>", RegexOptions.IgnoreCase);
            string str = r.Replace(strHTML, "");
            r = new Regex("<[^<>]+?>", RegexOptions.IgnoreCase);
            return r.Replace(str, "");

        }

       /// <summary>
       /// 验证域名是否合法
       /// </summary>
       /// <param name="dm"></param>
       /// <returns></returns>
       public static bool ValidDomainName(string dm)
       {
           Regex r = new Regex("^[\\w-]+(\\.[\\w-]+)+$", RegexOptions.IgnoreCase);
           return r.IsMatch(dm);
       }
       /// <summary>
       /// 验证电话是否合法
       /// </summary>
       /// <param name="dm"></param>
       /// <returns></returns>
       public static bool ValidPhone(string phone)
       {
           Regex r = new Regex("^[\\d]{3,5}-[\\d]{7,8}(-[\\d]{3,6})?$", RegexOptions.IgnoreCase);
           return r.IsMatch(phone);
       }
       /// <summary>
       /// 验证移动电话是否合法
       /// </summary>
       /// <param name="dm"></param>
       /// <returns></returns>
       public static bool ValidMobile(string mobile)
       {
           Regex r = new Regex("^1[3-9]\\d{9}$", RegexOptions.IgnoreCase);
           return r.IsMatch(mobile);
       }
       /// <summary>
       /// 验证QQ是否合法
       /// </summary>
       /// <param name="dm"></param>
       /// <returns></returns>
       public static bool ValidQQ(string qq)
       {
           Regex r = new Regex("^\\d{5,11}$", RegexOptions.IgnoreCase);
           return r.IsMatch(qq);
       }

       /// <summary>
       /// 验证网址是否合法
       /// </summary>
       /// <param name="dm"></param>
       /// <returns></returns>
       public static bool ValidUrl(string qq)
       {
           Regex r = new Regex("^https?://([\\w-]+\\.)+[\\w-]+(/[\\w-.\\/?%&=]*)?$", RegexOptions.IgnoreCase);
           return r.IsMatch(qq);
       }

       /// <summary>
       /// html 代码转UBB
       /// </summary>
       /// <returns></returns>
       public static string HtmlToUBB(string html)
       {
           if (html == null || html.Length == 0) { return ""; }
           string ubb = Regex.Replace(html,"<img.+?src=\\s*[\"']?([^\"' ]+)[\"']?[\\d\\D]*?>", "[img]$1[/img]", RegexOptions.IgnoreCase);
           ubb = Regex.Replace(ubb, "<a.+?href=\\s*[\"']?([^\"']+)[\"' ]?[\\d\\D]*?>([\\d\\D]+?)</a>", "[url=$1]$2[/url]", RegexOptions.IgnoreCase);
           ubb = Regex.Replace(ubb, "<([bui])>([\\d\\D]+?)</\\1>", "[$1]$2[/$1]", RegexOptions.IgnoreCase);
           ubb = Regex.Replace(ubb, "<(h[1-6])[\\d\\D]+?>([\\d\\D]+?)</\\1>", "[$1]$2[/$1]", RegexOptions.IgnoreCase);
           ubb = Regex.Replace(ubb, "<div[\\d\\D]+?>([\\d\\D]+?)</div>", "[CODE]$1[/CODE]", RegexOptions.IgnoreCase);
           return FilerHTML(ubb);
       }

       /// <summary>
       /// 获取主域名
       /// </summary>
       /// <param name="domainName"></param>
       /// <returns></returns>
       public static string GetMainDomainName(string domainName)
       {
           if (domainName == null | domainName.Length ==0)
           {
               return "";
           }
           string mainDom = HtmlHelper.GetHtmlContent(domainName, "([^.]+(?:\\.(?:com|cn|org|net|edu|info|gov|cc|tv|hk|me))+$)");
           if (mainDom.Length == 0)
           {
               return domainName;
           }
           return mainDom;
       }
    }
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值